summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp1019
-rw-r--r--contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp222
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp1133
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp160
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp121
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp1058
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp433
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp256
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp3234
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp279
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp118
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp937
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp110
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp741
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp269
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp310
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp1055
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp1880
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp183
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp733
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp273
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp1265
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp84
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp363
28 files changed, 16633 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 0000000..0e05129
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,1019 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments. In
+// practice, this means looking for internal functions that have pointer
+// arguments. If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value. This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded. Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently. This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "argpromotion"
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+
+namespace {
+ /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+ ///
+ struct ArgPromotion : public CallGraphSCCPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned maxElements = 3)
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// A vector used to hold the indices of a single GEP instruction
+ typedef std::vector<uint64_t> IndicesVector;
+
+ private:
+ bool isDenselyPacked(Type *type, const DataLayout &DL);
+ bool canPaddingBeAccessed(Argument *Arg);
+ CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+ bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+ AAResults &AAR) const;
+ CallGraphNode *DoPromotion(Function *F,
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
+
+ using llvm::Pass::doInitialization;
+ bool doInitialization(CallGraph &CG) override;
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
+ };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+ return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ bool Changed = false, LocalChange;
+
+ do { // Iterate until we stop promoting from this SCC.
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ if (CallGraphNode *CGN = PromoteArguments(*I)) {
+ LocalChange = true;
+ SCC.ReplaceNode(*I, CGN);
+ }
+ }
+ Changed |= LocalChange; // Remember that we changed something.
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// \brief Checks if a type could have padding bytes.
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
+
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
+
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+ return false;
+
+ if (!isa<CompositeType>(type))
+ return true;
+
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isa<PointerType>(seqTy) ||
+ isDenselyPacked(seqTy->getElementType(), DL);
+
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
+
+ return true;
+}
+
+/// \brief Checks if the padding bytes of an argument could be accessed.
+bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+
+ assert(arg->hasByValAttr());
+
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
+
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
+ }
+ }
+
+// Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
+
+ return false;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+ Function *F = CGN->getFunction();
+
+ // Make sure that it is local to this module.
+ if (!F || !F->hasLocalLinkage()) return nullptr;
+
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg()) return nullptr;
+
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<Argument*, 16> PointerArgs;
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
+ if (PointerArgs.empty()) return nullptr;
+
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ // Must be a direct call.
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
+
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
+
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // We need to manually construct BasicAA directly in order to disable its use
+ // of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument*, 8> ArgsToPromote;
+ SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+ for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
+ Argument *PtrArg = PointerArgs[i];
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+ // Replace sret attribute with noalias. This reduces register pressure by
+ // avoiding a register copy.
+ if (PtrArg->hasStructRetAttr()) {
+ unsigned ArgNo = PtrArg->getArgNo();
+ F->setAttributes(
+ F->getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ CS.setAttributes(
+ CS.getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1,
+ Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ }
+ }
+
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName() << "' because it would require adding more"
+ << " than " << maxElements << " arguments to the function.\n");
+ continue;
+ }
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
+ AllSimple = false;
+ break;
+ }
+ }
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
+ }
+ }
+
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
+ }
+ }
+
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return nullptr;
+
+ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+ Function *Callee = Arg->getParent();
+ const DataLayout &DL = Callee->getParent()->getDataLayout();
+
+ unsigned ArgNo = Arg->getArgNo();
+
+ // Look at all call sites of the function. At this pointer we know we only
+ // have direct callees.
+ for (User *U : Callee->users()) {
+ CallSite CS(U);
+ assert(CS && "Should only have direct calls!");
+
+ if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+ const ArgPromotion::IndicesVector &Longer) {
+ if (Prefix.size() > Longer.size())
+ return false;
+ return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+ std::set<ArgPromotion::IndicesVector> &Set) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+ std::set<ArgPromotion::IndicesVector> &Safe) {
+ std::set<ArgPromotion::IndicesVector>::iterator Low;
+ Low = Safe.upper_bound(ToMark);
+ // Guard against the case where Safe is empty
+ if (Low != Safe.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This
+ // means it points to a prefix of Indices (possibly Indices itself), if
+ // such prefix exists.
+ if (Low != Safe.end()) {
+ if (IsPrefix(*Low, ToMark))
+ // If there is already a prefix of these indices (or exactly these
+ // indices) marked a safe, don't bother adding these indices
+ return;
+
+ // Increment Low, so we can use it as a "insert before" hint
+ ++Low;
+ }
+ // Insert
+ Low = Safe.insert(Low, ToMark);
+ ++Low;
+ // If there we're a prefix of longer index list(s), remove those
+ std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ while (Low != End && IsPrefix(ToMark, *Low)) {
+ std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ ++Low;
+ Safe.erase(Remove);
+ }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
+ bool isByValOrInAlloca,
+ AAResults &AAR) const {
+ typedef std::set<IndicesVector> GEPIndicesSet;
+
+ // Quick exit for unused arguments
+ if (Arg->use_empty())
+ return true;
+
+ // We can only promote this argument if all of the uses are loads, or are GEP
+ // instructions (with constant indices) that are subsequently loaded.
+ //
+ // Promoting the argument causes it to be loaded in the caller
+ // unconditionally. This is only safe if we can prove that either the load
+ // would have happened in the callee anyway (ie, there is a load in the entry
+ // block) or the pointer passed in at every call site is guaranteed to be
+ // valid.
+ // In the former case, invalid loads can happen, but would have happened
+ // anyway, in the latter case, invalid loads won't happen. This prevents us
+ // from introducing an invalid load that wouldn't have happened in the
+ // original code.
+ //
+ // This set will contain all sets of indices that are loaded in the entry
+ // block, and thus are safe to unconditionally load in the caller.
+ //
+ // This optimization is also safe for InAlloca parameters, because it verifies
+ // that the address isn't captured.
+ GEPIndicesSet SafeToUnconditionallyLoad;
+
+ // This set contains all the sets of indices that we are planning to promote.
+ // This makes it possible to limit the number of arguments added.
+ GEPIndicesSet ToPromote;
+
+ // If the pointer is always valid, any load with first index 0 is valid.
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ // First, iterate the entry block and mark loads of (geps of) arguments as
+ // safe.
+ BasicBlock &EntryBlock = Arg->getParent()->front();
+ // Declare this here so we can reuse it
+ IndicesVector Indices;
+ for (Instruction &I : EntryBlock)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Value *V = LI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ V = GEP->getPointerOperand();
+ if (V == Arg) {
+ // This load actually loads (part of) Arg? Check the indices then.
+ Indices.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+ Indices.push_back(CI->getSExtValue());
+ else
+ // We found a non-constant GEP index for this argument? Bail out
+ // right away, can't promote this argument at all.
+ return false;
+
+ // Indices checked out, mark them as safe
+ MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ Indices.clear();
+ }
+ } else if (V == Arg) {
+ // Direct loads are equivalent to a GEP with a single 0 index.
+ MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ }
+ }
+
+ // Now, iterate all uses of the argument to see if there are any uses that are
+ // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ SmallVector<LoadInst*, 16> Loads;
+ IndicesVector Operands;
+ for (Use &U : Arg->uses()) {
+ User *UR = U.getUser();
+ Operands.clear();
+ if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
+ Loads.push_back(LI);
+ // Direct loads are equivalent to a GEP with a zero index and then a load.
+ Operands.push_back(0);
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
+ if (GEP->use_empty()) {
+ // Dead GEP's cause trouble later. Just remove them if we run into
+ // them.
+ GEP->eraseFromParent();
+ // TODO: This runs the above loop over and over again for dead GEPs
+ // Couldn't we just do increment the UI iterator earlier and erase the
+ // use?
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
+ }
+
+ // Ensure that all of the indices are constants.
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+ Operands.push_back(C->getSExtValue());
+ else
+ return false; // Not a constant operand GEP!
+
+ // Ensure that the only users of the GEP are load instructions.
+ for (User *GEPU : GEP->users())
+ if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
+ Loads.push_back(LI);
+ } else {
+ // Other uses than load?
+ return false;
+ }
+ } else {
+ return false; // Not a load or a GEP.
+ }
+
+ // Now, see if it is safe to promote this load / loads of this GEP. Loading
+ // is safe if Operands, or a prefix of Operands, is marked as safe.
+ if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ return false;
+
+ // See if we are already promoting a load with these indices. If not, check
+ // to make sure that we aren't promoting too many elements. If so, nothing
+ // to do.
+ if (ToPromote.find(Operands) == ToPromote.end()) {
+ if (maxElements > 0 && ToPromote.size() == maxElements) {
+ DEBUG(dbgs() << "argpromotion not promoting argument '"
+ << Arg->getName() << "' because it would require adding more "
+ << "than " << maxElements << " arguments to the function.\n");
+ // We limit aggregate promotion to only promoting up to a fixed number
+ // of elements of the aggregate.
+ return false;
+ }
+ ToPromote.insert(std::move(Operands));
+ }
+ }
+
+ if (Loads.empty()) return true; // No users, this is a dead argument.
+
+ // Okay, now we know that the argument is only used by load instructions and
+ // it is safe to unconditionally perform all of them. Use alias analysis to
+ // check to see if the pointer is guaranteed to not be modified from entry of
+ // the function to each of the load instructions.
+
+ // Because there could be several/many load instructions, remember which
+ // blocks we know to be transparent to the load.
+ SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+ for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ // Check to see if the load is invalidated from the start of the block to
+ // the load itself.
+ LoadInst *Load = Loads[i];
+ BasicBlock *BB = Load->getParent();
+
+ MemoryLocation Loc = MemoryLocation::get(Load);
+ if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
+ return false; // Pointer is invalidated!
+
+ // Now check every path from the entry block to the load for transparency.
+ // To do this, we perform a depth first search on the inverse CFG from the
+ // loading block.
+ for (BasicBlock *P : predecessors(BB)) {
+ for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+ if (AAR.canBasicBlockModify(*TranspBB, Loc))
+ return false;
+ }
+ }
+
+ // If the path from the entry of the function to each load is free of
+ // instructions that potentially invalidate the load, we can make the
+ // transformation!
+ return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
+
+ // Attribute - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeSet, 8> AttributesVec;
+ const AttributeSet &PAL = F->getAttributes();
+
+ // Add any return attributes.
+ if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getRetAttributes()));
+
+ // First, determine the new argument list
+ unsigned ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(&*I)) {
+ // Simple byval argument? Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(&*I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes(ArgIndex)) {
+ AttrBuilder B(attrs, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
+
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+ IndicesVector Indices;
+ Indices.reserve(UI->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(UI->user_back());
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
+ }
+
+ // Add a parameter to the function for each element passed in.
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ SI->second));
+ assert(Params.back());
+ }
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
+
+ // Add any function attributes.
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
+ PAL.getFnAttributes()));
+
+ Type *RetTy = FTy->getReturnType();
+
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
+
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
+
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
+ AttributesVec.clear();
+
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+ NF->takeName(F);
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+ // Get a new callgraph node for NF.
+ CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value*, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->user_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttributeSet &CallPAL = CS.getAttributes();
+
+ // Add any return attributes.
+ if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ CallPAL.getRetAttributes()));
+
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Args.push_back(*AI); // Unmodified argument
+
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ } else if (ByValArgsToTransform.count(&*I)) {
+ // Emit a GEP and load for each element of the struct.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value*> Ops;
+ for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+ E = ArgIndices.end(); SI != E; ++SI) {
+ Value *V = *AI;
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
+ if (!SI->second.empty()) {
+ Ops.reserve(SI->second.size());
+ Type *ElTy = V->getType();
+ for (IndicesVector::const_iterator II = SI->second.begin(),
+ IE = SI->second.end();
+ II != IE; ++II) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ Type *IdxTy = (ElTy->isStructTy() ?
+ Type::getInt32Ty(F->getContext()) :
+ Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, *II));
+ // Keep track of the type we're currently indexing.
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(SI->first, V, Ops,
+ V->getName() + ".idx", Call);
+ Ops.clear();
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
+
+ Args.push_back(newLoad);
+ }
+ }
+
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ // Add any function attributes.
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
+ AttributesVec));
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
+ AttributesVec));
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+ Args.clear();
+ AttributesVec.clear();
+
+ // Update the callgraph to know that the callsite has been transformed.
+ CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+ CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
+
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ ++I2;
+ continue;
+ }
+
+ if (ByValArgsToTransform.count(&*I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = &NF->begin()->front();
+
+ // Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
+ I2->setName(I->getName()+"."+Twine(i));
+ new StoreInst(&*I2++, Idx, InsertPt);
+ }
+
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&*I);
+
+ // If the alloca is used in a call, we must clear the tail flag since
+ // the callee now uses an alloca from the caller.
+ for (User *U : TheAlloca->users()) {
+ CallInst *Call = dyn_cast<CallInst>(U);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
+ }
+ continue;
+ }
+
+ if (I->use_empty())
+ continue;
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ assert(ArgIndices.begin()->second.empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName()+".val");
+ LI->replaceAllUsesWith(&*I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ It->second != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
+ }
+
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
+ }
+ NewName += ".val";
+ TheArg->setName(NewName);
+
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->user_back());
+ L->replaceAllUsesWith(&*TheArg);
+ L->eraseFromParent();
+ }
+ GEP->eraseFromParent();
+ }
+ }
+
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ std::advance(I2, ArgIndices.size());
+ }
+
+ NF_CGN->stealCalledFunctionsFrom(CG[F]);
+
+ // Now that the old function is dead, delete it. If there is a dangling
+ // reference to the CallgraphNode, just leave the dead function around for
+ // someone else to nuke.
+ CallGraphNode *CGN = CG[F];
+ if (CGN->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(CGN);
+ else
+ F->setLinkage(Function::ExternalLinkage);
+
+ return NF_CGN;
+}
+
+bool ArgPromotion::doInitialization(CallGraph &CG) {
+ return CallGraphSCCPass::doInitialization(CG);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 0000000..6af1043
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+ static char ID; // Pass identification.
+
+ BarrierNoop() : ModulePass(ID) {
+ initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 0000000..0aa49d6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,222 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared. This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "constmerge"
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+ struct ConstantMerge : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ // For this pass, process all of the globals in the module, eliminating
+ // duplicate constants.
+ bool runOnModule(Module &M) override;
+
+ // Return true iff we can determine the alignment of this global variable.
+ bool hasKnownAlignment(GlobalVariable *GV) const;
+
+ // Return the alignment of the global, including converting the default
+ // alignment to a concrete value.
+ unsigned getAlignment(GlobalVariable *GV) const;
+
+ };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
+ if (!LLVMUsed) return;
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ GlobalValue *GV = cast<GlobalValue>(Operand);
+ UsedValues.insert(GV);
+ }
+}
+
+// True if A is better than B.
+static bool IsBetterCanonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
+ if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+ return true;
+
+ if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+ return false;
+
+ return A.hasUnnamedAddr();
+}
+
+unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ unsigned Align = GV->getAlignment();
+ if (Align)
+ return Align;
+ return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
+ // Map unique constants to globals.
+ DenseMap<Constant *, GlobalVariable *> CMap;
+
+ // Replacements - This vector contains a list of replacements to perform.
+ SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+ bool MadeChange = false;
+
+ // Iterate constant merging while we are still making progress. Merging two
+ // constants together may allow us to merge other constants together if the
+ // second level constants have initializers which point to the globals that
+ // were just merged.
+ while (1) {
+
+ // First: Find the canonical constants others will be merged with.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = &*GVI++;
+
+ // If this GV is dead, remove it.
+ GV->removeDeadConstantUsers();
+ if (GV->use_empty() && GV->hasLocalLinkage()) {
+ GV->eraseFromParent();
+ continue;
+ }
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // This transformation is legal for weak ODR globals in the sense it
+ // doesn't change semantics, but we really don't want to perform it
+ // anyway; it's likely to pessimize code generation, and some tools
+ // (like the Darwin linker in cases involving CFString) don't expect it.
+ if (GV->isWeakForLinker())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *&Slot = CMap[Init];
+
+ // If this is the first constant we find or if the old one is local,
+ // replace with the current one. If the current is externally visible
+ // it cannot be replace, but can be the canonical constant we merge with.
+ if (!Slot || IsBetterCanonical(*GV, *Slot))
+ Slot = GV;
+ }
+
+ // Second: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = &*GVI++;
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // We can only replace constant with local linkage.
+ if (!GV->hasLocalLinkage())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *Slot = CMap[Init];
+
+ if (!Slot || Slot == GV)
+ continue;
+
+ if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ continue;
+
+ if (!GV->hasUnnamedAddr())
+ Slot->setUnnamedAddr(false);
+
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+
+ if (Replacements.empty())
+ return MadeChange;
+ CMap.clear();
+
+ // Now that we have figured out which replacements must be made, do them all
+ // now. This avoid invalidating the pointers in CMap, which are unneeded
+ // now.
+ for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Bump the alignment if necessary.
+ if (Replacements[i].first->getAlignment() ||
+ Replacements[i].second->getAlignment()) {
+ Replacements[i].second->setAlignment(
+ std::max(getAlignment(Replacements[i].first),
+ getAlignment(Replacements[i].second)));
+ }
+
+ // Eliminate any uses of the dead global.
+ Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+ // Delete the global value from the module.
+ assert(Replacements[i].first->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
+ Replacements[i].first->eraseFromParent();
+ }
+
+ NumMerged += Replacements.size();
+ Replacements.clear();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 0000000..5bbb751
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+ static char ID;
+ CrossDSOCFI() : ModulePass(ID) {
+ initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+ MDNode *VeryLikelyWeights;
+
+ ConstantInt *extractBitSetTypeId(MDNode *MD);
+ void buildCFICheck();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+ false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+ M = &Mod;
+ VeryLikelyWeights =
+ MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+ return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+ // This check excludes vtables for classes inside anonymous namespaces.
+ auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!TM)
+ return nullptr;
+ auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+ if (!C) return nullptr;
+ // We are looking for i64 constants.
+ if (C->getBitWidth() != 64) return nullptr;
+
+ // Sanity check.
+ auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+ // Can be null if a function was removed by an optimization.
+ if (FM) {
+ auto F = dyn_cast<Function>(FM->getValue());
+ // But can never be a function declaration.
+ assert(!F || !F->isDeclaration());
+ (void)F; // Suppress unused variable warning in the no-asserts build.
+ }
+ return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+ // FIXME: verify that __cfi_check ends up near the end of the code section,
+ // but before the jump slots created in LowerBitSets.
+ llvm::DenseSet<uint64_t> BitSetIds;
+ NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ if (BitSetNM)
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+ if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+ BitSetIds.insert(TypeId->getZExtValue());
+
+ LLVMContext &Ctx = M->getContext();
+ Constant *C = M->getOrInsertFunction(
+ "__cfi_check",
+ FunctionType::get(
+ Type::getVoidTy(Ctx),
+ {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+ false));
+ Function *F = dyn_cast<Function>(C);
+ F->setAlignment(4096);
+ auto args = F->arg_begin();
+ Argument &CallSiteTypeId = *(args++);
+ CallSiteTypeId.setName("CallSiteTypeId");
+ Argument &Addr = *(args++);
+ Addr.setName("Addr");
+ assert(args == F->arg_end());
+
+ BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+ BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+ IRBuilder<> IRBTrap(TrapBB);
+ Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+ llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ IRBTrap.CreateUnreachable();
+
+ BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+ IRBuilder<> IRBExit(ExitBB);
+ IRBExit.CreateRetVoid();
+
+ IRBuilder<> IRB(BB);
+ SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+ for (uint64_t TypeId : BitSetIds) {
+ ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+ BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+ IRBuilder<> IRBTest(TestBB);
+ Function *BitsetTestFn =
+ Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+ Value *Test = IRBTest.CreateCall(
+ BitsetTestFn, {&Addr, MetadataAsValue::get(
+ Ctx, ConstantAsMetadata::get(CaseTypeId))});
+ BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+ BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+ SI->addCase(CaseTypeId, TestBB);
+ ++TypeIds;
+ }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+ if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+ return false;
+ buildCFICheck();
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 0000000..4de3d95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1133 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions. Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions. This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <map>
+#include <set>
+#include <tuple>
+using namespace llvm;
+
+#define DEBUG_TYPE "deadargelim"
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef,
+ "Number of unread args replaced with undef");
+namespace {
+ /// DAE - The dead argument elimination pass.
+ ///
+ class DAE : public ModulePass {
+ public:
+
+ /// Struct that represents (part of) either a return value or a function
+ /// argument. Used so that arguments and return values can be used
+ /// interchangeably.
+ struct RetOrArg {
+ RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ IsArg(IsArg) {}
+ const Function *F;
+ unsigned Idx;
+ bool IsArg;
+
+ /// Make RetOrArg comparable, so we can put it into a map.
+ bool operator<(const RetOrArg &O) const {
+ return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg);
+ }
+
+ /// Make RetOrArg comparable, so we can easily iterate the multimap.
+ bool operator==(const RetOrArg &O) const {
+ return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+ }
+
+ std::string getDescription() const {
+ return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+ " of function " + F->getName()).str();
+ }
+ };
+
+ /// Liveness enum - During our initial pass over the program, we determine
+ /// that things are either alive or maybe alive. We don't mark anything
+ /// explicitly dead (even if we know they are), since anything not alive
+ /// with no registered uses (in Uses) will never be marked alive and will
+ /// thus become dead in the end.
+ enum Liveness { Live, MaybeLive };
+
+ /// Convenience wrapper
+ RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, false);
+ }
+ /// Convenience wrapper
+ RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ return RetOrArg(F, Idx, true);
+ }
+
+ typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+ /// This maps a return value or argument to any MaybeLive return values or
+ /// arguments it uses. This allows the MaybeLive values to be marked live
+ /// when any of its users is marked live.
+ /// For example (indices are left out for clarity):
+ /// - Uses[ret F] = ret G
+ /// This means that F calls G, and F returns the value returned by G.
+ /// - Uses[arg F] = ret G
+ /// This means that some function calls G and passes its result as an
+ /// argument to F.
+ /// - Uses[ret F] = arg F
+ /// This means that F returns one of its own arguments.
+ /// - Uses[arg F] = arg G
+ /// This means that G calls F and passes one of its own (G's) arguments
+ /// directly to F.
+ UseMap Uses;
+
+ typedef std::set<RetOrArg> LiveSet;
+ typedef std::set<const Function*> LiveFuncSet;
+
+ /// This set contains all values that have been determined to be live.
+ LiveSet LiveValues;
+ /// This set contains all values that are cannot be changed in any way.
+ LiveFuncSet LiveFunctions;
+
+ typedef SmallVector<RetOrArg, 5> UseVector;
+
+ protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ virtual bool ShouldHackArguments() const { return false; }
+
+ private:
+ Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+ unsigned RetValNum = -1U);
+ Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+ void SurveyFunction(const Function &F);
+ void MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses);
+ void MarkLive(const RetOrArg &RA);
+ void MarkLive(const Function &F);
+ void PropagateLiveness(const RetOrArg &RA);
+ bool RemoveDeadStuffFromFunction(Function *F);
+ bool DeleteDeadVarargs(Function &Fn);
+ bool RemoveDeadArgumentsFromCallers(Function &Fn);
+ };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+ /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+ /// deletes arguments to functions which are external. This is only for use
+ /// by bugpoint.
+ struct DAH : public DAE {
+ static char ID;
+ DAH() : DAE(ID) {}
+
+ bool ShouldHackArguments() const override { return true; }
+ };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+ assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+ // Ensure that the function is only directly called.
+ if (Fn.hasAddressTaken())
+ return false;
+
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked)) {
+ return false;
+ }
+
+ // Okay, we know we can transform this function if safe. Scan its body
+ // looking for calls marked musttail or calls to llvm.vastart.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->isMustTailCall())
+ return false;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ return false;
+ }
+ }
+ }
+
+ // If we get here, there are no calls to llvm.vastart in the function body,
+ // remove the "..." and adjust all the calls.
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but doesn't have isVarArg set.
+ FunctionType *FTy = Fn.getFunctionType();
+
+ std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+ Params, false);
+ unsigned NumArgs = Params.size();
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, Fn.getLinkage());
+ NF->copyAttributesFrom(&Fn);
+ Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
+ NF->takeName(&Fn);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
+ CallSite CS(*I++);
+ if (!CS)
+ continue;
+ Instruction *Call = CS.getInstruction();
+
+ // Pass all the same arguments.
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+ // Drop any attributes that were on the vararg arguments.
+ AttributeSet PAL = CS.getAttributes();
+ if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
+ SmallVector<AttributeSet, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlotAttributes(i));
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
+ PAL.getFnAttributes()));
+ PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+ }
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call);
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(PAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(PAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty())
+ Call->replaceAllUsesWith(New);
+
+ New->takeName(Call);
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well. While we're at
+ // it, remove the dead arguments from the DeadArguments list.
+ //
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // Move the name and users over to the new version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ }
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(Fn.getSubprogram());
+
+ // Fix up any BlockAddresses that refer to the function.
+ Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
+ // Delete the bitcast that we just created, so that NF does not
+ // appear to be address-taken.
+ NF->removeDeadConstantUsers();
+ // Finally, nuke the old function.
+ Fn.eraseFromParent();
+ return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+ // We cannot change the arguments if this TU does not define the function or
+ // if the linker may choose a function body from another TU, even if the
+ // nominal linkage indicates that other copies of the function have the same
+ // semantics. In the below example, the dead load from %p may not have been
+ // eliminated from the linker-chosen copy of f, so replacing %p with undef
+ // in callers may introduce undefined behavior.
+ //
+ // define linkonce_odr void @f(i32* %p) {
+ // %v = load i32 %p
+ // ret void
+ // }
+ if (!Fn.isStrongDefinitionForLinker())
+ return false;
+
+ // Functions with local linkage should already have been handled, except the
+ // fragile (variadic) ones which we can improve here.
+ if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+ return false;
+
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (Fn.hasFnAttribute(Attribute::Naked))
+ return false;
+
+ if (Fn.use_empty())
+ return false;
+
+ SmallVector<unsigned, 8> UnusedArgs;
+ for (Argument &Arg : Fn.args()) {
+ if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+ UnusedArgs.push_back(Arg.getArgNo());
+ }
+
+ if (UnusedArgs.empty())
+ return false;
+
+ bool Changed = false;
+
+ for (Use &U : Fn.uses()) {
+ CallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U))
+ continue;
+
+ // Now go through all unused args and replace them with "undef".
+ for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+ unsigned ArgNo = UnusedArgs[I];
+
+ Value *Arg = CS.getArgument(ArgNo);
+ CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ ++NumArgumentsReplacedWithUndef;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+ Type *RetTy = F->getReturnType();
+ if (RetTy->isVoidTy())
+ return 0;
+ else if (StructType *STy = dyn_cast<StructType>(RetTy))
+ return STy->getNumElements();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getNumElements();
+ else
+ return 1;
+}
+
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+ Type *RetTy = F->getReturnType();
+ assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
+ return STy->getElementType(Idx);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getElementType();
+ else
+ return RetTy;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+ // We're live if our use or its Function is already marked as live.
+ if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+ return Live;
+
+ // We're maybe live otherwise, but remember that we must become live if
+ // Use becomes live.
+ MaybeLiveUses.push_back(Use);
+ return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(const Use *U,
+ UseVector &MaybeLiveUses, unsigned RetValNum) {
+ const User *V = U->getUser();
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // original Use.
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = CreateRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ } else {
+ DAE::Liveness Result = MaybeLive;
+ for (unsigned i = 0; i < NumRetVals(F); ++i) {
+ RetOrArg Use = CreateRet(F, i);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
+ }
+ return Result;
+ }
+ }
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ && IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (const Use &UU : IV->uses()) {
+ Result = SurveyUse(&UU, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+ }
+
+ if (auto CS = ImmutableCallSite(V)) {
+ const Function *F = CS.getCalledFunction();
+ if (F) {
+ // Used in a direct call.
+
+ // The function argument is live if it is used as a bundle operand.
+ if (CS.isBundleOperand(U))
+ return Live;
+
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and the we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CS.getArgumentNo(U);
+
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
+
+ assert(CS.getArgument(ArgNo)
+ == CS->getOperand(U->getOperandNo())
+ && "Argument is not where we expected it");
+
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = CreateArg(F, ArgNo);
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ }
+ }
+ // Used in any other way? Value must be live.
+ return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+ // Assume it's dead (which will only hold if there are no uses at all..).
+ Liveness Result = MaybeLive;
+ // Check each use.
+ for (const Use &U : V->uses()) {
+ Result = SurveyUse(&U, MaybeLiveUses);
+ if (Result == Live)
+ break;
+ }
+ return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value. This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+ // Functions with inalloca parameters are expecting args in a particular
+ // register and memory layout.
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+ MarkLive(F);
+ return;
+ }
+
+ // Don't touch naked functions. The assembly might be using an argument, or
+ // otherwise rely on the frame layout in a way that this analysis will not
+ // see.
+ if (F.hasFnAttribute(Attribute::Naked)) {
+ MarkLive(F);
+ return;
+ }
+
+ unsigned RetCount = NumRetVals(&F);
+ // Assume all return values are dead
+ typedef SmallVector<Liveness, 5> RetVals;
+ RetVals RetValLiveness(RetCount, MaybeLive);
+
+ typedef SmallVector<UseVector, 5> RetUses;
+ // These vectors map each return value to the uses that make it MaybeLive, so
+ // we can add those to the Uses map if the return value really turns out to be
+ // MaybeLive. Initialized to a list of RetCount empty lists.
+ RetUses MaybeLiveRetUses(RetCount);
+
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+ != F.getFunctionType()->getReturnType()) {
+ // We don't support old style multiple return values.
+ MarkLive(F);
+ return;
+ }
+
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ MarkLive(F);
+ return;
+ }
+
+ DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+ // Keep track of the number of live retvals, so we can skip checks once all
+ // of them turn out to be live.
+ unsigned NumLiveRetVals = 0;
+ // Loop all uses of the function.
+ for (const Use &U : F.uses()) {
+ // If the function is PASSED IN as an argument, its address has been
+ // taken.
+ ImmutableCallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U)) {
+ MarkLive(F);
+ return;
+ }
+
+ // If this use is anything other than a call site, the function is alive.
+ const Instruction *TheCall = CS.getInstruction();
+ if (!TheCall) { // Not a direct call site?
+ MarkLive(F);
+ return;
+ }
+
+ // If we end up here, we are looking at a direct call to our function.
+
+ // Now, check how our return value(s) is/are used in this caller. Don't
+ // bother checking return values if all of them are live already.
+ if (NumLiveRetVals == RetCount)
+ continue;
+
+ // Check all uses of the return value.
+ for (const Use &U : TheCall->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
+ }
+ } else {
+ // Used by something else than extractvalue. Survey, but assume that the
+ // result applies to all sub-values.
+ UseVector MaybeLiveAggregateUses;
+ if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
+ NumLiveRetVals = RetCount;
+ RetValLiveness.assign(RetCount, Live);
+ break;
+ } else {
+ for (unsigned i = 0; i != RetCount; ++i) {
+ if (RetValLiveness[i] != Live)
+ MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
+ }
+ }
+ }
+ }
+ }
+
+ // Now we've inspected all callers, record the liveness of our return values.
+ for (unsigned i = 0; i != RetCount; ++i)
+ MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+ DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+ // Now, check all of our arguments.
+ unsigned i = 0;
+ UseVector MaybeLiveArgUses;
+ for (Function::const_arg_iterator AI = F.arg_begin(),
+ E = F.arg_end(); AI != E; ++AI, ++i) {
+ Liveness Result;
+ if (F.getFunctionType()->isVarArg()) {
+ // Variadic functions will already have a va_arg function expanded inside
+ // them, making them potentially very sensitive to ABI changes resulting
+ // from removing arguments entirely, so don't. For example AArch64 handles
+ // register and stack HFAs very differently, and this is reflected in the
+ // IR which has already been generated.
+ Result = Live;
+ } else {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Result = SurveyUses(&*AI, MaybeLiveArgUses);
+ }
+
+ // Mark the result.
+ MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ // Clear the vector again for the next iteration.
+ MaybeLiveArgUses.clear();
+ }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
+ switch (L) {
+ case Live: MarkLive(RA); break;
+ case MaybeLive:
+ {
+ // Note any uses of this value, so this return value can be
+ // marked live whenever one of the uses becomes live.
+ for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+ UE = MaybeLiveUses.end(); UI != UE; ++UI)
+ Uses.insert(std::make_pair(*UI, RA));
+ break;
+ }
+ }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+ DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+ if (LiveFunctions.count(RA.F))
+ return; // Function was already marked Live.
+
+ if (!LiveValues.insert(RA).second)
+ return; // We were already marked Live.
+
+ DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+ PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+ // We don't use upper_bound (or equal_range) here, because our recursive call
+ // to ourselves is likely to cause the upper_bound (which is the first value
+ // not belonging to RA) to become erased and the iterator invalidated.
+ UseMap::iterator Begin = Uses.lower_bound(RA);
+ UseMap::iterator E = Uses.end();
+ UseMap::iterator I;
+ for (I = Begin; I != E && I->first == RA; ++I)
+ MarkLive(I->second);
+
+ // Erase RA from the Uses map (from the lower bound to wherever we ended up
+ // after the loop).
+ Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+ // Don't modify fully live functions
+ if (LiveFunctions.count(F))
+ return false;
+
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has fewer arguments and a different return type.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type*> Params;
+
+ // Keep track of if we have a live 'returned' argument
+ bool HasLiveReturnedArg = false;
+
+ // Set up to build a new list of parameter attributes.
+ SmallVector<AttributeSet, 8> AttributesVec;
+ const AttributeSet &PAL = F->getAttributes();
+
+ // Remember which arguments are still alive.
+ SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+ // Construct the new parameter list from non-dead arguments. Also construct
+ // a new set of parameter attributes to correspond. Skip the first parameter
+ // attribute, since that belongs to the return value.
+ unsigned i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++i) {
+ RetOrArg Arg = CreateArg(F, i);
+ if (LiveValues.erase(Arg)) {
+ Params.push_back(I->getType());
+ ArgAlive[i] = true;
+
+ // Get the original parameter attributes (skipping the first one, that is
+ // for the return value.
+ if (PAL.hasAttributes(i + 1)) {
+ AttrBuilder B(PAL, i + 1);
+ if (B.contains(Attribute::Returned))
+ HasLiveReturnedArg = true;
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
+ } else {
+ ++NumArgumentsEliminated;
+ DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+ << ") from " << F->getName() << "\n");
+ }
+ }
+
+ // Find out the new return value.
+ Type *RetTy = FTy->getReturnType();
+ Type *NRetTy = nullptr;
+ unsigned RetCount = NumRetVals(F);
+
+ // -1 means unused, other numbers are the new index
+ SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+ std::vector<Type*> RetTypes;
+
+ // If there is a function with a live 'returned' argument but a dead return
+ // value, then there are two possible actions:
+ // 1) Eliminate the return value and take off the 'returned' attribute on the
+ // argument.
+ // 2) Retain the 'returned' attribute and treat the return value (but not the
+ // entire function) as live so that it is not eliminated.
+ //
+ // It's not clear in the general case which option is more profitable because,
+ // even in the absence of explicit uses of the return value, code generation
+ // is free to use the 'returned' attribute to do things like eliding
+ // save/restores of registers across calls. Whether or not this happens is
+ // target and ABI-specific as well as depending on the amount of register
+ // pressure, so there's no good way for an IR-level pass to figure this out.
+ //
+ // Fortunately, the only places where 'returned' is currently generated by
+ // the FE are places where 'returned' is basically free and almost always a
+ // performance win, so the second option can just be used always for now.
+ //
+ // This should be revisited if 'returned' is ever applied more liberally.
+ if (RetTy->isVoidTy() || HasLiveReturnedArg) {
+ NRetTy = RetTy;
+ } else {
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(getRetComponentType(F, i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
+ } else {
+ ++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ if (RetTypes.size() > 1) {
+ // More than one return type? Reduce it down to size.
+ if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ } else {
+ assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+ NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
+ }
+ } else if (RetTypes.size() == 1)
+ // One return type? Just a simple value then, but only if we didn't use to
+ // return a struct with that simple value before.
+ NRetTy = RetTypes.front();
+ else if (RetTypes.size() == 0)
+ // No return types? Make it void, but only if we didn't use to return {}.
+ NRetTy = Type::getVoidTy(F->getContext());
+ }
+
+ assert(NRetTy && "No new return type found?");
+
+ // The existing function return attributes.
+ AttributeSet RAttrs = PAL.getRetAttributes();
+
+ // Remove any incompatible attributes, but only if we removed all return
+ // values. Otherwise, ensure that we don't have any conflicting attributes
+ // here. Currently, this should not be possible, but special handling might be
+ // required when new return value attributes are added.
+ if (NRetTy->isVoidTy())
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NRetTy));
+ else
+ assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
+ "Return attributes no longer compatible?");
+
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
+
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getFnAttributes()));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+ // Create the new function type based on the recomputed parameters.
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+ // No change?
+ if (NFTy == FTy)
+ return false;
+
+ // Create the new function body and insert it into the module...
+ Function *NF = Function::Create(NFTy, F->getLinkage());
+ NF->copyAttributesFrom(F);
+ NF->setAttributes(NewPAL);
+ // Insert the new function before the old function, so we won't be processing
+ // it again.
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+ NF->takeName(F);
+
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in a smaller number of arguments into the new function.
+ //
+ std::vector<Value*> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->user_back());
+ Instruction *Call = CS.getInstruction();
+
+ AttributesVec.clear();
+ const AttributeSet &CallPAL = CS.getAttributes();
+
+ // The call return attributes.
+ AttributeSet RAttrs = CallPAL.getRetAttributes();
+
+ // Adjust in case the function was changed to return void.
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NF->getReturnType()));
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+
+ // Declare these outside of the loops, so we can reuse them for the second
+ // loop, which loops the varargs.
+ CallSite::arg_iterator I = CS.arg_begin();
+ unsigned i = 0;
+ // Loop over those operands, corresponding to the normal arguments to the
+ // original function, and add those that are still alive.
+ for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+ if (ArgAlive[i]) {
+ Args.push_back(*I);
+ // Get original parameter attributes, but skip return attributes.
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ // If the return type has changed, then get rid of 'returned' on the
+ // call site. The alternative is to make all 'returned' attributes on
+ // call sites keep the return value alive just like 'returned'
+ // attributes on function declaration but it's less clearly a win
+ // and this is not an expected case anyway
+ if (NRetTy != RetTy && B.contains(Attribute::Returned))
+ B.removeAttribute(Attribute::Returned);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ // Push any varargs arguments on the list. Don't forget their attributes.
+ for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ Args.push_back(*I);
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
+ }
+
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
+
+ // Reconstruct the AttributesList based on the vector we constructed.
+ AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+ Instruction *New;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, "", Call->getParent());
+ cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ } else {
+ New = CallInst::Create(NF, Args, "", Call);
+ cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+ cast<CallInst>(New)->setAttributes(NewCallPAL);
+ if (cast<CallInst>(Call)->isTailCall())
+ cast<CallInst>(New)->setTailCall();
+ }
+ New->setDebugLoc(Call->getDebugLoc());
+
+ Args.clear();
+
+ if (!Call->use_empty()) {
+ if (New->getType() == Call->getType()) {
+ // Return type not changed? Just replace users then.
+ Call->replaceAllUsesWith(New);
+ New->takeName(Call);
+ } else if (New->getType()->isVoidTy()) {
+ // Our return value has uses, but they will get removed later on.
+ // Replace by null for now.
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ } else {
+ assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
+ "Return type changed, but not into a void. The old return type"
+ " must have been a struct or an array!");
+ Instruction *InsertPt = Call;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+ InsertPt = &*NewEdge->getFirstInsertionPt();
+ }
+
+ // We used to return a struct or array. Instead of doing smart stuff
+ // with all the uses, we will just rebuild it using extract/insertvalue
+ // chaining and let instcombine clean that up.
+ //
+ // Start out building up our return value from undef
+ Value *RetVal = UndefValue::get(RetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ Value *V;
+ if (RetTypes.size() > 1)
+ // We are still returning a struct, so extract the value from our
+ // return value
+ V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+ InsertPt);
+ else
+ // We are now returning a single element, so just insert that
+ V = New;
+ // Insert the value at the old position
+ RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ }
+ // Now, replace all uses of the old call instruction with the return
+ // struct we built
+ Call->replaceAllUsesWith(RetVal);
+ New->takeName(Call);
+ }
+ }
+
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
+
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ i = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin(); I != E; ++I, ++i)
+ if (ArgAlive[i]) {
+ // If this is a live argument, move the name and users over to the new
+ // version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ ++I2;
+ } else {
+ // If this argument is dead, replace any uses of it with null constants
+ // (these are guaranteed to become unused later on).
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ }
+
+ // If we change the return value of the function we must rewrite any return
+ // instructions. Check this now.
+ if (F->getReturnType() != NF->getReturnType())
+ for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ Value *RetVal;
+
+ if (NFTy->getReturnType()->isVoidTy()) {
+ RetVal = nullptr;
+ } else {
+ assert(RetTy->isStructTy() || RetTy->isArrayTy());
+ // The original return value was a struct or array, insert
+ // extractvalue/insertvalue chains to extract only the values we need
+ // to return and insert them into our new result.
+ // This does generate messy code, but we'll let it to instcombine to
+ // clean that up.
+ Value *OldRet = RI->getOperand(0);
+ // Start out building up our return value from undef
+ RetVal = UndefValue::get(NRetTy);
+ for (unsigned i = 0; i != RetCount; ++i)
+ if (NewRetIdxs[i] != -1) {
+ ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+ "oldret", RI);
+ if (RetTypes.size() > 1) {
+ // We're still returning a struct, so reinsert the value into
+ // our new return value at the new index
+
+ RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+ "newret", RI);
+ } else {
+ // We are now only returning a simple value, so just return the
+ // extracted value.
+ RetVal = EV;
+ }
+ }
+ }
+ // Replace the return instruction with one returning the new return
+ // value (possibly 0 if we became void).
+ ReturnInst::Create(F->getContext(), RetVal, RI);
+ BB->getInstList().erase(RI);
+ }
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(F->getSubprogram());
+
+ // Now that the old function is dead, delete it.
+ F->eraseFromParent();
+
+ return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // First pass: Do a simple check to see if any functions can have their "..."
+ // removed. We can do this if they never call va_start. This loop cannot be
+ // fused with the next loop, because deleting a function invalidates
+ // information computed while surveying other functions.
+ DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function &F = *I++;
+ if (F.getFunctionType()->isVarArg())
+ Changed |= DeleteDeadVarargs(F);
+ }
+
+ // Second phase:loop through the module, determining which arguments are live.
+ // We assume all arguments are dead unless proven otherwise (allowing us to
+ // determine that dead arguments passed into recursive functions are dead).
+ //
+ DEBUG(dbgs() << "DAE - Determining liveness\n");
+ for (auto &F : M)
+ SurveyFunction(F);
+
+ // Now, remove all dead arguments and return values from each function in
+ // turn.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ // Increment now, because the function will probably get removed (ie.
+ // replaced by a new one).
+ Function *F = &*I++;
+ Changed |= RemoveDeadStuffFromFunction(F);
+ }
+
+ // Finally, look for any unused parameters in functions with non-local
+ // linkage and replace the passed in parameters with undef.
+ for (auto &F : M)
+ Changed |= RemoveDeadArgumentsFromCallers(F);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
new file mode 100644
index 0000000..af313a6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -0,0 +1,84 @@
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate available external global
+// definitions from the program, turning them into declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "elim-avail-extern"
+
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+struct EliminateAvailableExternally : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternally() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+};
+}
+
+char EliminateAvailableExternally::ID = 0;
+INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern",
+ "Eliminate Available Externally Globals", false, false)
+
+ModulePass *llvm::createEliminateAvailableExternallyPass() {
+ return new EliminateAvailableExternally();
+}
+
+bool EliminateAvailableExternally::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Drop initializers of available externally global variables.
+ for (GlobalVariable &GV : M.globals()) {
+ if (!GV.hasAvailableExternallyLinkage())
+ continue;
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
+ if (isSafeToDestroyConstant(Init))
+ Init->destroyConstant();
+ }
+ GV.removeDeadConstantUsers();
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ NumVariables++;
+ Changed = true;
+ }
+
+ // Drop the bodies of available externally functions.
+ for (Function &F : M) {
+ if (!F.hasAvailableExternallyLinkage())
+ continue;
+ if (!F.isDeclaration())
+ // This will set the linkage to external
+ F.deleteBody();
+ F.removeDeadConstantUsers();
+ NumFunctions++;
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 0000000..1a3b925
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,160 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+using namespace llvm;
+
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+ bool Local = GV.hasLocalLinkage();
+ if (Local || Delete) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+ return;
+ }
+
+ if (!GV.hasLinkOnceLinkage()) {
+ assert(!GV.isDiscardableIfUnused());
+ return;
+ }
+
+ // Map linkonce* to weak* so that llvm doesn't drop this GV.
+ switch(GV.getLinkage()) {
+ default:
+ llvm_unreachable("Unexpected linkage");
+ case GlobalValue::LinkOnceAnyLinkage:
+ GV.setLinkage(GlobalValue::WeakAnyLinkage);
+ return;
+ case GlobalValue::LinkOnceODRLinkage:
+ GV.setLinkage(GlobalValue::WeakODRLinkage);
+ return;
+ }
+}
+
+namespace {
+ /// @brief A pass to extract specific functions and their dependencies.
+ class GVExtractorPass : public ModulePass {
+ SetVector<GlobalValue *> Named;
+ bool deleteStuff;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+ /// specified function. Otherwise, it deletes as much of the module as
+ /// possible, except for the function specified.
+ ///
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+ bool runOnModule(Module &M) override {
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ bool Delete =
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+ if (!Delete) {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ if (I->getName() == "llvm.global_ctors")
+ continue;
+ }
+
+ makeVisible(*I, Delete);
+
+ if (Delete) {
+ // Make this a declaration and drop it's comdat.
+ I->setInitializer(nullptr);
+ I->setComdat(nullptr);
+ }
+ }
+
+ // Visit the Functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ bool Delete =
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+ if (!Delete) {
+ if (I->hasAvailableExternallyLinkage())
+ continue;
+ }
+
+ makeVisible(*I, Delete);
+
+ if (Delete) {
+ // Make this a declaration and drop it's comdat.
+ I->deleteBody();
+ I->setComdat(nullptr);
+ }
+ }
+
+ // Visit the Aliases.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ Module::alias_iterator CurI = I;
+ ++I;
+
+ bool Delete = deleteStuff == (bool)Named.count(&*CurI);
+ makeVisible(*CurI, Delete);
+
+ if (Delete) {
+ Type *Ty = CurI->getType()->getElementType();
+
+ CurI->removeFromParent();
+ llvm::Value *Declaration;
+ if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+ CurI->getName(), &M);
+
+ } else {
+ Declaration =
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, CurI->getName());
+
+ }
+ CurI->replaceAllUsesWith(Declaration);
+ delete &*CurI;
+ }
+ }
+
+ return true;
+ }
+ };
+
+ char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 0000000..816291d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+ ForceAttributes("force-attribute", cl::Hidden,
+ cl::desc("Add an attribute to a function. This should be a "
+ "pair of 'function-name:attribute-name', for "
+ "example -force-add-attribute=foo:noinline. This "
+ "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+ return StringSwitch<Attribute::AttrKind>(Kind)
+ .Case("alwaysinline", Attribute::AlwaysInline)
+ .Case("builtin", Attribute::Builtin)
+ .Case("cold", Attribute::Cold)
+ .Case("convergent", Attribute::Convergent)
+ .Case("inlinehint", Attribute::InlineHint)
+ .Case("jumptable", Attribute::JumpTable)
+ .Case("minsize", Attribute::MinSize)
+ .Case("naked", Attribute::Naked)
+ .Case("nobuiltin", Attribute::NoBuiltin)
+ .Case("noduplicate", Attribute::NoDuplicate)
+ .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+ .Case("noinline", Attribute::NoInline)
+ .Case("nonlazybind", Attribute::NonLazyBind)
+ .Case("noredzone", Attribute::NoRedZone)
+ .Case("noreturn", Attribute::NoReturn)
+ .Case("norecurse", Attribute::NoRecurse)
+ .Case("nounwind", Attribute::NoUnwind)
+ .Case("optnone", Attribute::OptimizeNone)
+ .Case("optsize", Attribute::OptimizeForSize)
+ .Case("readnone", Attribute::ReadNone)
+ .Case("readonly", Attribute::ReadOnly)
+ .Case("argmemonly", Attribute::ArgMemOnly)
+ .Case("returns_twice", Attribute::ReturnsTwice)
+ .Case("safestack", Attribute::SafeStack)
+ .Case("sanitize_address", Attribute::SanitizeAddress)
+ .Case("sanitize_memory", Attribute::SanitizeMemory)
+ .Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("ssp", Attribute::StackProtect)
+ .Case("sspreq", Attribute::StackProtectReq)
+ .Case("sspstrong", Attribute::StackProtectStrong)
+ .Case("uwtable", Attribute::UWTable)
+ .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+ for (auto &S : ForceAttributes) {
+ auto KV = StringRef(S).split(':');
+ if (KV.first != F.getName())
+ continue;
+
+ auto Kind = parseAttrKind(KV.second);
+ if (Kind == Attribute::None) {
+ DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+ << " unknown or not handled!\n");
+ continue;
+ }
+ if (F.hasFnAttribute(Kind))
+ continue;
+ F.addFnAttr(Kind);
+ }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+ if (ForceAttributes.empty())
+ return PreservedAnalyses::all();
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Just conservatively invalidate analyses, this isn't likely to be important.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeForceFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (ForceAttributes.empty())
+ return false;
+
+ for (Function &F : M.functions())
+ addForcedAttributes(F);
+
+ // Conservatively assume we changed something.
+ return true;
+ }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+ "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+ return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 0000000..6dcfb3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,1058 @@
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly. It does the
+// same with function arguments independently, marking them readonly/
+// readnone/nocapture. Finally, well-known library call declarations
+// are marked with all attributes that are consistent with the
+// function's standard definition. This pass is implemented as a
+// bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "functionattrs"
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
+STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
+
+namespace {
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
+
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ FunctionAttrs() : CallGraphSCCPass(ID) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ bool doInitialization(CallGraph &CG) override {
+ Revisit.clear();
+ return false;
+ }
+ bool doFinalization(CallGraph &CG) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+private:
+ TargetLibraryInfo *TLI;
+ SmallVector<WeakVH,16> Revisit;
+};
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+ MAK_ReadNone = 0,
+ MAK_ReadOnly = 1,
+ MAK_MayWrite = 2
+};
+}
+
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+ if (MRB == FMRB_DoesNotAccessMemory)
+ // Already perfect!
+ return MAK_ReadNone;
+
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that writes memory, so treat them like declarations.
+ if (F.isDeclaration() || F.mayBeOverridden()) {
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return MAK_ReadOnly;
+
+ // Conservatively assume it writes to memory.
+ return MAK_MayWrite;
+ }
+
+ // Scan the function body for instructions that may read or write memory.
+ bool ReadsMemory = false;
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+
+ // Some instructions can be ignored even if they read or write memory.
+ // Detect these now, skipping to the next instruction if one is found.
+ CallSite CS(cast<Value>(I));
+ if (CS) {
+ // Ignore calls to functions in the same SCC.
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ continue;
+ FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+
+ // If the call doesn't access memory, we're done.
+ if (!(MRB & MRI_ModRef))
+ continue;
+
+ if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & MRI_Mod)
+ return MAK_MayWrite;
+ // If it reads, note it.
+ if (MRB & MRI_Ref)
+ ReadsMemory = true;
+ continue;
+ }
+
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
+ for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ CI != CE; ++CI) {
+ Value *Arg = *CI;
+ if (!Arg->getType()->isPtrOrPtrVectorTy())
+ continue;
+
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+ MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
+
+ // Skip accesses to local or constant memory as they don't impact the
+ // externally visible mod/ref behavior.
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+
+ if (MRB & MRI_Mod)
+ // Writes non-local memory. Give up.
+ return MAK_MayWrite;
+ if (MRB & MRI_Ref)
+ // Ok, it reads non-local memory.
+ ReadsMemory = true;
+ }
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+ if (!LI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+ if (!SI->isVolatile()) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ MemoryLocation Loc = MemoryLocation::get(VI);
+ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+
+ // Any remaining instructions need to be taken seriously! Check if they
+ // read or write memory.
+ if (I->mayWriteToMemory())
+ // Writes memory. Just give up.
+ return MAK_MayWrite;
+
+ // If this instruction may read memory, remember that.
+ ReadsMemory |= I->mayReadFromMemory();
+ }
+
+ return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
+
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+ // Check if any of the functions in the SCC read or write memory. If they
+ // write memory then they can't be marked readnone or readonly.
+ bool ReadsMemory = false;
+ for (Function *F : SCCNodes) {
+ // Call the callable parameter to look up AA results for this function.
+ AAResults &AAR = AARGetter(*F);
+
+ switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ case MAK_MayWrite:
+ return false;
+ case MAK_ReadOnly:
+ ReadsMemory = true;
+ break;
+ case MAK_ReadNone:
+ // Nothing to do!
+ break;
+ }
+ }
+
+ // Success! Functions in this SCC do not access memory, or only read memory.
+ // Give them the appropriate attribute.
+ bool MadeChange = false;
+ for (Function *F : SCCNodes) {
+ if (F->doesNotAccessMemory())
+ // Already perfect!
+ continue;
+
+ if (F->onlyReadsMemory() && ReadsMemory)
+ // No change.
+ continue;
+
+ MadeChange = true;
+
+ // Clear out any existing attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ F->removeAttributes(
+ AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
+
+ // Add in the new attribute.
+ F->addAttribute(AttributeSet::FunctionIndex,
+ ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+
+ if (ReadsMemory)
+ ++NumReadOnly;
+ else
+ ++NumReadNone;
+ }
+
+ return MadeChange;
+}
+
+namespace {
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+ Argument *Definition;
+ SmallVector<ArgumentGraphNode *, 4> Uses;
+};
+
+class ArgumentGraph {
+ // We store pointers to ArgumentGraphNode objects, so it's important that
+ // that they not move around upon insert.
+ typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+
+ ArgumentMapTy ArgumentMap;
+
+ // There is no root node for the argument graph, in fact:
+ // void f(int *x, int *y) { if (...) f(x, y); }
+ // is an example where the graph is disconnected. The SCCIterator requires a
+ // single entry point, so we maintain a fake ("synthetic") root node that
+ // uses every node. Because the graph is directed and nothing points into
+ // the root, it will not participate in any SCCs (except for its own).
+ ArgumentGraphNode SyntheticRoot;
+
+public:
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+
+ iterator begin() { return SyntheticRoot.Uses.begin(); }
+ iterator end() { return SyntheticRoot.Uses.end(); }
+ ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+ ArgumentGraphNode *operator[](Argument *A) {
+ ArgumentGraphNode &Node = ArgumentMap[A];
+ Node.Definition = A;
+ SyntheticRoot.Uses.push_back(&Node);
+ return &Node;
+ }
+};
+
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+ ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
+ : Captured(false), SCCNodes(SCCNodes) {}
+
+ void tooManyUses() override { Captured = true; }
+
+ bool captured(const Use *U) override {
+ CallSite CS(U->getUser());
+ if (!CS.getInstruction()) {
+ Captured = true;
+ return true;
+ }
+
+ Function *F = CS.getCalledFunction();
+ if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+ !SCCNodes.count(F)) {
+ Captured = true;
+ return true;
+ }
+
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account for
+ // these.
+
+ unsigned UseIndex =
+ std::distance(const_cast<const Use *>(CS.arg_begin()), U);
+
+ assert(UseIndex < CS.data_operands_size() &&
+ "Indirect function calls should have been filtered above!");
+
+ if (UseIndex >= CS.getNumArgOperands()) {
+ // Data operand, but not a argument operand -- must be a bundle operand
+ assert(CS.hasOperandBundles() && "Must be!");
+
+ // CaptureTracking told us that we're being captured by an operand bundle
+ // use. In this case it does not matter if the callee is within our SCC
+ // or not -- we've been captured in some unknown way, and we have to be
+ // conservative.
+ Captured = true;
+ return true;
+ }
+
+ if (UseIndex >= F->arg_size()) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ Captured = true;
+ return true;
+ }
+
+ Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+ return false;
+ }
+
+ bool Captured; // True only if certainly captured (used outside our SCC).
+ SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+ const SCCNodeSet &SCCNodes;
+};
+}
+
+namespace llvm {
+template <> struct GraphTraits<ArgumentGraphNode *> {
+ typedef ArgumentGraphNode NodeType;
+ typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
+
+ static inline NodeType *getEntryNode(NodeType *A) { return A; }
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->Uses.begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) {
+ return N->Uses.end();
+ }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+ static NodeType *getEntryNode(ArgumentGraph *AG) {
+ return AG->getEntryNode();
+ }
+ static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+ return AG->begin();
+ }
+ static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
+}
+
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+static Attribute::AttrKind
+determinePointerReadAttrs(Argument *A,
+ const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+ SmallVector<Use *, 32> Worklist;
+ SmallSet<Use *, 32> Visited;
+
+ // inalloca arguments are always clobbered by the call.
+ if (A->hasInAllocaAttr())
+ return Attribute::None;
+
+ bool IsRead = false;
+ // We don't need to track IsWritten. If A is written to, return immediately.
+
+ for (Use &U : A->uses()) {
+ Visited.insert(&U);
+ Worklist.push_back(&U);
+ }
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ // The original value is not read/written via this if the new value isn't.
+ for (Use &UU : I->uses())
+ if (Visited.insert(&UU).second)
+ Worklist.push_back(&UU);
+ break;
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ bool Captures = true;
+
+ if (I->getType()->isVoidTy())
+ Captures = false;
+
+ auto AddUsersToWorklistIfCapturing = [&] {
+ if (Captures)
+ for (Use &UU : I->uses())
+ if (Visited.insert(&UU).second)
+ Worklist.push_back(&UU);
+ };
+
+ CallSite CS(I);
+ if (CS.doesNotAccessMemory()) {
+ AddUsersToWorklistIfCapturing();
+ continue;
+ }
+
+ Function *F = CS.getCalledFunction();
+ if (!F) {
+ if (CS.onlyReadsMemory()) {
+ IsRead = true;
+ AddUsersToWorklistIfCapturing();
+ continue;
+ }
+ return Attribute::None;
+ }
+
+ // Note: the callee and the two successor blocks *follow* the argument
+ // operands. This means there is no need to adjust UseIndex to account
+ // for these.
+
+ unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+ // U cannot be the callee operand use: since we're exploring the
+ // transitive uses of an Argument, having such a use be a callee would
+ // imply the CallSite is an indirect call or invoke; and we'd take the
+ // early exit above.
+ assert(UseIndex < CS.data_operands_size() &&
+ "Data operand use expected!");
+
+ bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+ if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+ assert(F->isVarArg() && "More params than args in non-varargs call");
+ return Attribute::None;
+ }
+
+ Captures &= !CS.doesNotCapture(UseIndex);
+
+ // Since the optimizer (by design) cannot see the data flow corresponding
+ // to a operand bundle use, these cannot participate in the optimistic SCC
+ // analysis. Instead, we model the operand bundle uses as arguments in
+ // call to a function external to the SCC.
+ if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+ IsOperandBundleUse) {
+
+ // The accessors used on CallSite here do the right thing for calls and
+ // invokes with operand bundles.
+
+ if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+ return Attribute::None;
+ if (!CS.doesNotAccessMemory(UseIndex))
+ IsRead = true;
+ }
+
+ AddUsersToWorklistIfCapturing();
+ break;
+ }
+
+ case Instruction::Load:
+ IsRead = true;
+ break;
+
+ case Instruction::ICmp:
+ case Instruction::Ret:
+ break;
+
+ default:
+ return Attribute::None;
+ }
+ }
+
+ return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
+}
+
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
+ bool Changed = false;
+
+ ArgumentGraph AG;
+
+ AttrBuilder B;
+ B.addAttribute(Attribute::NoCapture);
+
+ // Check each function in turn, determining which pointer arguments are not
+ // captured.
+ for (Function *F : SCCNodes) {
+ // Definitions with weak linkage may be overridden at linktime with
+ // something that captures pointers, so treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ continue;
+
+ // Functions that are readonly (or readnone) and nounwind and don't return
+ // a value can't capture arguments. Don't analyze them.
+ if (F->onlyReadsMemory() && F->doesNotThrow() &&
+ F->getReturnType()->isVoidTy()) {
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+ A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+ }
+ continue;
+ }
+
+ for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+ ++A) {
+ if (!A->getType()->isPointerTy())
+ continue;
+ bool HasNonLocalUses = false;
+ if (!A->hasNoCaptureAttr()) {
+ ArgumentUsesTracker Tracker(SCCNodes);
+ PointerMayBeCaptured(&*A, &Tracker);
+ if (!Tracker.Captured) {
+ if (Tracker.Uses.empty()) {
+ // If it's trivially not captured, mark it nocapture now.
+ A->addAttr(
+ AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ } else {
+ // If it's not trivially captured and not trivially not captured,
+ // then it must be calling into another function in our SCC. Save
+ // its particulars for Argument-SCC analysis later.
+ ArgumentGraphNode *Node = AG[&*A];
+ for (SmallVectorImpl<Argument *>::iterator
+ UI = Tracker.Uses.begin(),
+ UE = Tracker.Uses.end();
+ UI != UE; ++UI) {
+ Node->Uses.push_back(AG[*UI]);
+ if (*UI != A)
+ HasNonLocalUses = true;
+ }
+ }
+ }
+ // Otherwise, it's captured. Don't bother doing SCC analysis on it.
+ }
+ if (!HasNonLocalUses && !A->onlyReadsMemory()) {
+ // Can we determine that it's readonly/readnone without doing an SCC?
+ // Note that we don't allow any calls at all here, or else our result
+ // will be dependent on the iteration order through the functions in the
+ // SCC.
+ SmallPtrSet<Argument *, 8> Self;
+ Self.insert(&*A);
+ Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
+ if (R != Attribute::None) {
+ AttrBuilder B;
+ B.addAttribute(R);
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ Changed = true;
+ R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ }
+ }
+ }
+ }
+
+ // The graph we've collected is partial because we stopped scanning for
+ // argument uses once we solved the argument trivially. These partial nodes
+ // show up as ArgumentGraphNode objects with an empty Uses list, and for
+ // these nodes the final decision about whether they capture has already been
+ // made. If the definition doesn't have a 'nocapture' attribute by now, it
+ // captures.
+
+ for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+ const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
+ if (ArgumentSCC.size() == 1) {
+ if (!ArgumentSCC[0]->Definition)
+ continue; // synthetic root node
+
+ // eg. "void f(int* x) { if (...) f(x); }"
+ if (ArgumentSCC[0]->Uses.size() == 1 &&
+ ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
+ Argument *A = ArgumentSCC[0]->Definition;
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+ continue;
+ }
+
+ bool SCCCaptured = false;
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *Node = *I;
+ if (Node->Uses.empty()) {
+ if (!Node->Definition->hasNoCaptureAttr())
+ SCCCaptured = true;
+ }
+ }
+ if (SCCCaptured)
+ continue;
+
+ SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
+ // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
+ // quickly looking up whether a given Argument is in this ArgumentSCC.
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
+ ArgumentSCCNodes.insert((*I)->Definition);
+ }
+
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
+ ArgumentGraphNode *N = *I;
+ for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+ UE = N->Uses.end();
+ UI != UE; ++UI) {
+ Argument *A = (*UI)->Definition;
+ if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
+ continue;
+ SCCCaptured = true;
+ break;
+ }
+ }
+ if (SCCCaptured)
+ continue;
+
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ ++NumNoCapture;
+ Changed = true;
+ }
+
+ // We also want to compute readonly/readnone. With a small number of false
+ // negatives, we can assume that any pointer which is captured isn't going
+ // to be provably readonly or readnone, since by definition we can't
+ // analyze all uses of a captured pointer.
+ //
+ // The false negatives happen when the pointer is captured by a function
+ // that promises readonly/readnone behaviour on the pointer, then the
+ // pointer's lifetime ends before anything that writes to arbitrary memory.
+ // Also, a readonly/readnone pointer may be returned, but returning a
+ // pointer is capturing it.
+
+ Attribute::AttrKind ReadAttr = Attribute::ReadNone;
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes);
+ if (K == Attribute::ReadNone)
+ continue;
+ if (K == Attribute::ReadOnly) {
+ ReadAttr = Attribute::ReadOnly;
+ continue;
+ }
+ ReadAttr = K;
+ break;
+ }
+
+ if (ReadAttr != Attribute::None) {
+ AttrBuilder B, R;
+ B.addAttribute(ReadAttr);
+ R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+ Argument *A = ArgumentSCC[i]->Definition;
+ // Clear out existing readonly/readnone attributes
+ A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ if (Constant *C = dyn_cast<Constant>(RetVal)) {
+ if (!C->isNullValue() && !isa<UndefValue>(C))
+ return false;
+
+ continue;
+ }
+
+ if (isa<Argument>(RetVal))
+ return false;
+
+ if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
+ continue;
+ }
+
+ // Check whether the pointer came from an allocation.
+ case Instruction::Alloca:
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ if (CS.paramHasAttr(0, Attribute::NoAlias))
+ break;
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ break;
+ } // fall-through
+ default:
+ return false; // Did not come from an allocation.
+ }
+
+ if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+ return false;
+ }
+
+ return true;
+}
+
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
+ // Check each function in turn, determining which functions return noalias
+ // pointers.
+ for (Function *F : SCCNodes) {
+ // Already noalias.
+ if (F->doesNotAlias(0))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate noalias return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
+
+ if (!isFunctionMallocLike(F, SCCNodes))
+ return false;
+ }
+
+ bool MadeChange = false;
+ for (Function *F : SCCNodes) {
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ continue;
+
+ F->setDoesNotAlias(0);
+ ++NumNoAlias;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI, bool &Speculative) {
+ assert(F->getReturnType()->isPointerTy() &&
+ "nonnull only meaningful on pointer types");
+ Speculative = false;
+
+ SmallSetVector<Value *, 8> FlowsToReturn;
+ for (BasicBlock &BB : *F)
+ if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+ FlowsToReturn.insert(Ret->getReturnValue());
+
+ for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+ Value *RetVal = FlowsToReturn[i];
+
+ // If this value is locally known to be non-null, we're good
+ if (isKnownNonNull(RetVal, &TLI))
+ continue;
+
+ // Otherwise, we need to look upwards since we can't make any local
+ // conclusions.
+ Instruction *RVI = dyn_cast<Instruction>(RetVal);
+ if (!RVI)
+ return false;
+ switch (RVI->getOpcode()) {
+ // Extend the analysis by looking upwards.
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ FlowsToReturn.insert(RVI->getOperand(0));
+ continue;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(RVI);
+ FlowsToReturn.insert(SI->getTrueValue());
+ FlowsToReturn.insert(SI->getFalseValue());
+ continue;
+ }
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(RVI);
+ for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ FlowsToReturn.insert(PN->getIncomingValue(i));
+ continue;
+ }
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(RVI);
+ Function *Callee = CS.getCalledFunction();
+ // A call to a node within the SCC is assumed to return null until
+ // proven otherwise
+ if (Callee && SCCNodes.count(Callee)) {
+ Speculative = true;
+ continue;
+ }
+ return false;
+ }
+ default:
+ return false; // Unknown source, may be null
+ };
+ llvm_unreachable("should have either continued or returned");
+ }
+
+ return true;
+}
+
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ const TargetLibraryInfo &TLI) {
+ // Speculative that all functions in the SCC return only nonnull
+ // pointers. We may refute this as we analyze functions.
+ bool SCCReturnsNonNull = true;
+
+ bool MadeChange = false;
+
+ // Check each function in turn, determining which functions return nonnull
+ // pointers.
+ for (Function *F : SCCNodes) {
+ // Already nonnull.
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull))
+ continue;
+
+ // Definitions with weak linkage may be overridden at linktime, so
+ // treat them like declarations.
+ if (F->isDeclaration() || F->mayBeOverridden())
+ return false;
+
+ // We annotate nonnull return values, which are only applicable to
+ // pointer types.
+ if (!F->getReturnType()->isPointerTy())
+ continue;
+
+ bool Speculative = false;
+ if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+ if (!Speculative) {
+ // Mark the function eagerly since we may discover a function
+ // which prevents us from speculating about the entire SCC
+ DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
+ continue;
+ }
+ // At least one function returns something which could be null, can't
+ // speculate any more.
+ SCCReturnsNonNull = false;
+ }
+
+ if (SCCReturnsNonNull) {
+ for (Function *F : SCCNodes) {
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::NonNull) ||
+ !F->getReturnType()->isPointerTy())
+ continue;
+
+ DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ ++NumNonNullReturn;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+static bool setDoesNotRecurse(Function &F) {
+ if (F.doesNotRecurse())
+ return false;
+ F.setDoesNotRecurse();
+ ++NumNoRecurse;
+ return true;
+}
+
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+ SmallVectorImpl<WeakVH> &Revisit) {
+ // Try and identify functions that do not recurse.
+
+ // If the SCC contains multiple nodes we know for sure there is recursion.
+ if (!SCC.isSingular())
+ return false;
+
+ const CallGraphNode *CGN = *SCC.begin();
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration() || F->doesNotRecurse())
+ return false;
+
+ // If all of the calls in F are identifiable and are to norecurse functions, F
+ // is norecurse. This check also detects self-recursion as F is not currently
+ // marked norecurse, so any called from F to F will not be marked norecurse.
+ if (std::all_of(CGN->begin(), CGN->end(),
+ [](const CallGraphNode::CallRecord &CR) {
+ Function *F = CR.second->getFunction();
+ return F && F->doesNotRecurse();
+ }))
+ // Function calls a potentially recursive function.
+ return setDoesNotRecurse(*F);
+
+ // We know that F is not obviously recursive, but we haven't been able to
+ // prove that it doesn't actually recurse. Add it to the Revisit list to try
+ // again top-down later.
+ Revisit.push_back(F);
+ return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+ // If F is internal and all uses are in norecurse functions, then F is also
+ // norecurse.
+ if (F->doesNotRecurse())
+ return false;
+ if (F->hasInternalLinkage()) {
+ for (auto *U : F->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!I->getParent()->getParent()->doesNotRecurse())
+ return false;
+ } else {
+ return false;
+ }
+ return setDoesNotRecurse(*F);
+ }
+ return false;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ bool Changed = false;
+
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+ // whether a given CallGraphNode is in this SCC. Also track whether there are
+ // any external or opt-none nodes that will prevent us from optimizing any
+ // part of the SCC.
+ SCCNodeSet SCCNodes;
+ bool ExternalNode = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ // External node or function we're trying not to optimize - we both avoid
+ // transform them and avoid leveraging information they provide.
+ ExternalNode = true;
+ continue;
+ }
+
+ SCCNodes.insert(F);
+ }
+
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!ExternalNode) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes, *TLI);
+ }
+
+ Changed |= addNoRecurseAttrs(SCC, Revisit);
+ return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+ bool Changed = false;
+ // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+ // the rules we have for identifying norecurse functions work best with a
+ // top-down walk, so look again at all the functions we previously marked as
+ // worth revisiting, in top-down order.
+ for (auto &F : reverse(Revisit))
+ if (F)
+ Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 0000000..d8b677b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+ "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+ cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+ LLVMContext &Context) {
+ SMDiagnostic Err;
+ DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+ std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ if (!Result) {
+ Err.print("function-import", errs());
+ return nullptr;
+ }
+
+ Result->materializeMetadata();
+ UpgradeDebugInfo(*Result);
+
+ return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+ /// Cache of lazily loaded module for import.
+ StringMap<std::unique_ptr<Module>> ModuleMap;
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+ /// Create the loader, Module will be initialized in \p Context.
+ ModuleLazyLoaderCache(std::function<
+ std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+ : createLazyModule(createLazyModule) {}
+
+ /// Retrieve a Module from the cache or lazily load it on demand.
+ Module &operator()(StringRef FileName);
+
+ std::unique_ptr<Module> takeModule(StringRef FileName) {
+ auto I = ModuleMap.find(FileName);
+ assert(I != ModuleMap.end());
+ std::unique_ptr<Module> Ret = std::move(I->second);
+ ModuleMap.erase(I);
+ return Ret;
+ }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+ auto &Module = ModuleMap[Identifier];
+ if (!Module)
+ Module = createLazyModule(Identifier);
+ return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+ const FunctionInfoIndex &Index,
+ StringSet<> &CalledFunctions,
+ SmallVector<StringRef, 64> &Worklist) {
+ // We need to suffix internal function calls imported from other modules,
+ // prepare the suffix ahead of time.
+ std::string Suffix;
+ if (F.getParent() != &DestModule)
+ Suffix =
+ (Twine(".llvm.") +
+ Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (isa<CallInst>(I)) {
+ auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+ // Insert any new external calls that have not already been
+ // added to set/worklist.
+ if (!CalledFunction || !CalledFunction->hasName())
+ continue;
+ // Ignore intrinsics early
+ if (CalledFunction->isIntrinsic()) {
+ assert(CalledFunction->getIntrinsicID() != 0);
+ continue;
+ }
+ auto ImportedName = CalledFunction->getName();
+ auto Renamed = (ImportedName + Suffix).str();
+ // Rename internal functions
+ if (CalledFunction->hasInternalLinkage()) {
+ ImportedName = Renamed;
+ }
+ auto It = CalledFunctions.insert(ImportedName);
+ if (!It.second) {
+ // This is a call to a function we already considered, skip.
+ continue;
+ }
+ // Ignore functions already present in the destination module
+ auto *SrcGV = DestModule.getNamedValue(ImportedName);
+ if (SrcGV) {
+ assert(isa<Function>(SrcGV) && "Name collision during import");
+ if (!cast<Function>(SrcGV)->isDeclaration()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+ << ImportedName << " already in DestinationModule\n");
+ continue;
+ }
+ }
+
+ Worklist.push_back(It.first->getKey());
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Adding callee for : " << ImportedName << " : "
+ << F.getName() << "\n");
+ }
+ }
+ }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+ SmallVector<StringRef, 64> &Worklist,
+ StringSet<> &CalledFunctions,
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ &ModuleToFunctionsToImportMap,
+ const FunctionInfoIndex &Index,
+ ModuleLazyLoaderCache &ModuleLoaderCache) {
+ while (!Worklist.empty()) {
+ auto CalledFunctionName = Worklist.pop_back_val();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+ << CalledFunctionName << "\n");
+
+ // Try to get a summary for this function call.
+ auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+ if (InfoList == Index.end()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+ << CalledFunctionName << " Ignoring.\n");
+ continue;
+ }
+ assert(!InfoList->second.empty() && "No summary, error at import?");
+
+ // Comdat can have multiple entries, FIXME: what do we do with them?
+ auto &Info = InfoList->second[0];
+ assert(Info && "Nullptr in list, error importing summaries?\n");
+
+ auto *Summary = Info->functionSummary();
+ if (!Summary) {
+ // FIXME: in case we are lazyloading summaries, we can do it now.
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Missing summary for " << CalledFunctionName
+ << ", error at import?\n");
+ llvm_unreachable("Missing summary");
+ }
+
+ if (Summary->instCount() > ImportInstrLimit) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+ << CalledFunctionName << " with " << Summary->instCount()
+ << " instructions (limit " << ImportInstrLimit << ")\n");
+ continue;
+ }
+
+ // Get the module path from the summary.
+ auto ModuleIdentifier = Summary->modulePath();
+ DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+ << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+ auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+ // The function that we will import!
+ GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+ if (!SGV) {
+ // The destination module is referencing function using their renamed name
+ // when importing a function that was originally local in the source
+ // module. The source module we have might not have been renamed so we try
+ // to remove the suffix added during the renaming to recover the original
+ // name in the source module.
+ std::pair<StringRef, StringRef> Split =
+ CalledFunctionName.split(".llvm.");
+ SGV = SrcModule.getNamedValue(Split.first);
+ assert(SGV && "Can't find function to import in source module");
+ }
+ if (!SGV) {
+ report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+ "' in Module '" + SrcModule.getModuleIdentifier() +
+ "', error in the summary?\n");
+ }
+
+ Function *F = dyn_cast<Function>(SGV);
+ if (!F && isa<GlobalAlias>(SGV)) {
+ auto *SGA = dyn_cast<GlobalAlias>(SGV);
+ F = dyn_cast<Function>(SGA->getBaseObject());
+ CalledFunctionName = F->getName();
+ }
+ assert(F && "Imported Function is ... not a Function");
+
+ // We cannot import weak_any functions/aliases without possibly affecting
+ // the order they are seen and selected by the linker, changing program
+ // semantics.
+ if (SGV->hasWeakAnyLinkage()) {
+ DEBUG(dbgs() << DestModule.getModuleIdentifier()
+ << ": Ignoring import request for weak-any "
+ << (isa<Function>(SGV) ? "function " : "alias ")
+ << CalledFunctionName << " from "
+ << SrcModule.getModuleIdentifier() << "\n");
+ continue;
+ }
+
+ // Add the function to the import list
+ auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+ Entry.insert(F);
+
+ // Process the newly imported functions and add callees to the worklist.
+ F->materialize();
+ findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+ }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+ DEBUG(dbgs() << "Starting import for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ unsigned ImportedCount = 0;
+
+ /// First step is collecting the called external functions.
+ StringSet<> CalledFunctions;
+ SmallVector<StringRef, 64> Worklist;
+ for (auto &F : DestModule) {
+ if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+ findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+ }
+ if (Worklist.empty())
+ return false;
+
+ /// Second step: for every call to an external function, try to import it.
+
+ // Linker that will be used for importing function
+ Linker TheLinker(DestModule);
+
+ // Map of Module -> List of Function to import from the Module
+ std::map<StringRef, DenseSet<const GlobalValue *>>
+ ModuleToFunctionsToImportMap;
+
+ // Analyze the summaries and get the list of functions to import by
+ // populating ModuleToFunctionsToImportMap
+ ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+ GetImportList(DestModule, Worklist, CalledFunctions,
+ ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+ assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+ StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+ ModuleToTempMDValsMap;
+
+ // Do the actual import of functions now, one Module at a time
+ for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+ // Get the module for the import
+ auto &FunctionsToImport = FunctionsToImportPerModule.second;
+ std::unique_ptr<Module> SrcModule =
+ ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+ assert(&DestModule.getContext() == &SrcModule->getContext() &&
+ "Context mismatch");
+
+ // Save the mapping of value ids to temporary metadata created when
+ // importing this function. If we have already imported from this module,
+ // add new temporary metadata to the existing mapping.
+ auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+ if (!TempMDVals)
+ TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+ // Link in the specified functions.
+ if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+ &Index, &FunctionsToImport, TempMDVals.get()))
+ report_fatal_error("Function Import: link error");
+
+ ImportedCount += FunctionsToImport.size();
+ }
+
+ // Now link in metadata for all modules from which we imported functions.
+ for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+ ModuleToTempMDValsMap) {
+ // Load the specified source module.
+ auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+ // Link in all necessary metadata from this module.
+ if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+ << DestModule.getModuleIdentifier() << "\n");
+ return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+ SummaryFile("summary-file",
+ cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+ raw_ostream &OS = errs();
+ DiagnosticPrinterRawOStream DP(OS);
+ DI.print(DP);
+ OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Path);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ Buffer = std::move(BufferOrErr.get());
+ ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+ object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+ DiagnosticHandler);
+ if (std::error_code EC = ObjOrErr.getError()) {
+ Error = EC.message();
+ return nullptr;
+ }
+ return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+ /// Optional function summary index to use for importing, otherwise
+ /// the summary-file option must be specified.
+ const FunctionInfoIndex *Index;
+
+public:
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ /// Specify pass name for debug output
+ const char *getPassName() const override {
+ return "Function Importing";
+ }
+
+ explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+ : ModulePass(ID), Index(Index) {}
+
+ bool runOnModule(Module &M) override {
+ if (SummaryFile.empty() && !Index)
+ report_fatal_error("error: -function-import requires -summary-file or "
+ "file from frontend\n");
+ std::unique_ptr<FunctionInfoIndex> IndexPtr;
+ if (!SummaryFile.empty()) {
+ if (Index)
+ report_fatal_error("error: -summary-file and index from frontend\n");
+ std::string Error;
+ IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+ if (!IndexPtr) {
+ errs() << "Error loading file '" << SummaryFile << "': " << Error
+ << "\n";
+ return false;
+ }
+ Index = IndexPtr.get();
+ }
+
+ // Perform the import now.
+ auto ModuleLoader = [&M](StringRef Identifier) {
+ return loadFile(Identifier, M.getContext());
+ };
+ FunctionImporter Importer(*Index, ModuleLoader);
+ return Importer.importFunctions(M);
+
+ return false;
+ }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+ "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+ return new FunctionImportPass(Index);
+}
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 0000000..9b276ed
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,256 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program. It uses an aggressive algorithm, searching out globals that are
+// known to be alive. After it finds all of the globals which are needed, it
+// deletes whatever is left over. This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+#include <unordered_map>
+using namespace llvm;
+
+#define DEBUG_TYPE "globaldce"
+
+STATISTIC(NumAliases , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+ struct GlobalDCE : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the GlobalDCE pass on the specified module, optionally updating
+ // the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) override;
+
+ private:
+ SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+ SmallPtrSet<Constant *, 8> SeenConstants;
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+
+ /// GlobalIsNeeded - mark the specific global value as needed, and
+ /// recursively mark anything that it uses as also needed.
+ void GlobalIsNeeded(GlobalValue *GV);
+ void MarkUsedGlobalsAsNeeded(Constant *C);
+
+ bool RemoveUnusedGlobalValue(GlobalValue &GV);
+ };
+}
+
+/// Returns true if F contains only a single "ret" instruction.
+static bool isEmptyFunction(Function *F) {
+ BasicBlock &Entry = F->getEntryBlock();
+ if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
+ return false;
+ ReturnInst &RI = cast<ReturnInst>(Entry.front());
+ return RI.getReturnValue() == nullptr;
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Remove empty functions from the global ctors list.
+ Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+
+ // Collect the set of members for each comdat.
+ for (Function &F : M)
+ if (Comdat *C = F.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &F));
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GV));
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GA));
+
+ // Loop over the module, adding globals which are obviously necessary.
+ for (Function &F : M) {
+ Changed |= RemoveUnusedGlobalValue(F);
+ // Functions with external linkage are needed if they have a body
+ if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+ if (!F.isDiscardableIfUnused())
+ GlobalIsNeeded(&F);
+ }
+
+ for (GlobalVariable &GV : M.globals()) {
+ Changed |= RemoveUnusedGlobalValue(GV);
+ // Externally visible & appending globals are needed, if they have an
+ // initializer.
+ if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+ if (!GV.isDiscardableIfUnused())
+ GlobalIsNeeded(&GV);
+ }
+
+ for (GlobalAlias &GA : M.aliases()) {
+ Changed |= RemoveUnusedGlobalValue(GA);
+ // Externally visible aliases are needed.
+ if (!GA.isDiscardableIfUnused())
+ GlobalIsNeeded(&GA);
+ }
+
+ // Now that all globals which are needed are in the AliveGlobals set, we loop
+ // through the program, deleting those which are not alive.
+ //
+
+ // The first pass is to drop initializers of global variables which are dead.
+ std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+ for (GlobalVariable &GV : M.globals())
+ if (!AliveGlobals.count(&GV)) {
+ DeadGlobalVars.push_back(&GV); // Keep track of dead globals
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ GV.setInitializer(nullptr);
+ if (isSafeToDestroyConstant(Init))
+ Init->destroyConstant();
+ }
+ }
+
+ // The second pass drops the bodies of functions which are dead...
+ std::vector<Function *> DeadFunctions;
+ for (Function &F : M)
+ if (!AliveGlobals.count(&F)) {
+ DeadFunctions.push_back(&F); // Keep track of dead globals
+ if (!F.isDeclaration())
+ F.deleteBody();
+ }
+
+ // The third pass drops targets of aliases which are dead...
+ std::vector<GlobalAlias*> DeadAliases;
+ for (GlobalAlias &GA : M.aliases())
+ if (!AliveGlobals.count(&GA)) {
+ DeadAliases.push_back(&GA);
+ GA.setAliasee(nullptr);
+ }
+
+ if (!DeadFunctions.empty()) {
+ // Now that all interferences have been dropped, delete the actual objects
+ // themselves.
+ for (Function *F : DeadFunctions) {
+ RemoveUnusedGlobalValue(*F);
+ M.getFunctionList().erase(F);
+ }
+ NumFunctions += DeadFunctions.size();
+ Changed = true;
+ }
+
+ if (!DeadGlobalVars.empty()) {
+ for (GlobalVariable *GV : DeadGlobalVars) {
+ RemoveUnusedGlobalValue(*GV);
+ M.getGlobalList().erase(GV);
+ }
+ NumVariables += DeadGlobalVars.size();
+ Changed = true;
+ }
+
+ // Now delete any dead aliases.
+ if (!DeadAliases.empty()) {
+ for (GlobalAlias *GA : DeadAliases) {
+ RemoveUnusedGlobalValue(*GA);
+ M.getAliasList().erase(GA);
+ }
+ NumAliases += DeadAliases.size();
+ Changed = true;
+ }
+
+ // Make sure that all memory is released
+ AliveGlobals.clear();
+ SeenConstants.clear();
+ ComdatMembers.clear();
+
+ return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+ // If the global is already in the set, no need to reprocess it.
+ if (!AliveGlobals.insert(G).second)
+ return;
+
+ if (Comdat *C = G->getComdat()) {
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ GlobalIsNeeded(CM.second);
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+ // If this is a global variable, we must make sure to add any global values
+ // referenced by the initializer to the alive set.
+ if (GV->hasInitializer())
+ MarkUsedGlobalsAsNeeded(GV->getInitializer());
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+ // The target of a global alias is needed.
+ MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else {
+ // Otherwise this must be a function object. We have to scan the body of
+ // the function looking for constants and global values which are used as
+ // operands. Any operands of these types must be processed to ensure that
+ // any globals used will be marked as needed.
+ Function *F = cast<Function>(G);
+
+ for (Use &U : F->operands())
+ MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
+
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB)
+ for (Use &U : I.operands())
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
+ GlobalIsNeeded(GV);
+ else if (Constant *C = dyn_cast<Constant>(U))
+ MarkUsedGlobalsAsNeeded(C);
+ }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return GlobalIsNeeded(GV);
+
+ // Loop over all of the operands of the constant, adding any globals they
+ // use to the list of needed globals.
+ for (Use &U : C->operands()) {
+ // If we've already processed this constant there's no need to do it again.
+ Constant *Op = dyn_cast<Constant>(U);
+ if (Op && SeenConstants.insert(Op).second)
+ MarkUsedGlobalsAsNeeded(Op);
+ }
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it. This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+ if (GV.use_empty())
+ return false;
+ GV.removeDeadConstantUsers();
+ return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 0000000..fd77369
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,3234 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken. If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <deque>
+using namespace llvm;
+
+#define DEBUG_TYPE "globalopt"
+
+STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted , "Number of globals deleted");
+STATISTIC(NumGlobUses , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+
+namespace {
+ struct GlobalOpt : public ModulePass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ private:
+ bool OptimizeFunctions(Module &M);
+ bool OptimizeGlobalVars(Module &M);
+ bool OptimizeGlobalAliases(Module &M);
+ bool deleteIfDead(GlobalValue &GV);
+ bool processGlobal(GlobalValue &GV);
+ bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
+ bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+
+ bool isPointerValueDeadOnEntryToFunction(const Function *F,
+ GlobalValue *GV);
+
+ TargetLibraryInfo *TLI;
+ SmallSet<const Comdat *, 8> NotDiscardableComdats;
+ };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+/// Is this global variable possibly used by a leak checker as a root? If so,
+/// we might not really want to eliminate the stores to it.
+static bool isLeakCheckerRoot(GlobalVariable *GV) {
+ // A global variable is a root if it is a pointer, or could plausibly contain
+ // a pointer. There are two challenges; one is that we could have a struct
+ // the has an inner member which is a pointer. We recurse through the type to
+ // detect these (up to a point). The other is that we may actually be a union
+ // of a pointer and another type, and so our LLVM type is an integer which
+ // gets converted into a pointer, or our type is an [i8 x #] with a pointer
+ // potentially contained here.
+
+ if (GV->hasPrivateLinkage())
+ return false;
+
+ SmallVector<Type *, 4> Types;
+ Types.push_back(cast<PointerType>(GV->getType())->getElementType());
+
+ unsigned Limit = 20;
+ do {
+ Type *Ty = Types.pop_back_val();
+ switch (Ty->getTypeID()) {
+ default: break;
+ case Type::PointerTyID: return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID: {
+ SequentialType *STy = cast<SequentialType>(Ty);
+ Types.push_back(STy->getElementType());
+ break;
+ }
+ case Type::StructTyID: {
+ StructType *STy = cast<StructType>(Ty);
+ if (STy->isOpaque()) return true;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Type *InnerTy = *I;
+ if (isa<PointerType>(InnerTy)) return true;
+ if (isa<CompositeType>(InnerTy))
+ Types.push_back(InnerTy);
+ }
+ break;
+ }
+ }
+ if (--Limit == 0) return true;
+ } while (!Types.empty());
+ return false;
+}
+
+/// Given a value that is stored to a global but never read, determine whether
+/// it's safe to remove the store and the chain of computation that feeds the
+/// store.
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
+ do {
+ if (isa<Constant>(V))
+ return true;
+ if (!V->hasOneUse())
+ return false;
+ if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+ isa<GlobalValue>(V))
+ return false;
+ if (isAllocationFn(V, TLI))
+ return true;
+
+ Instruction *I = cast<Instruction>(V);
+ if (I->mayHaveSideEffects())
+ return false;
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (!GEP->hasAllConstantIndices())
+ return false;
+ } else if (I->getNumOperands() != 1) {
+ return false;
+ }
+
+ V = I->getOperand(0);
+ } while (1);
+}
+
+/// This GV is a pointer root. Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+ const TargetLibraryInfo *TLI) {
+ // A brief explanation of leak checkers. The goal is to find bugs where
+ // pointers are forgotten, causing an accumulating growth in memory
+ // usage over time. The common strategy for leak checkers is to whitelist the
+ // memory pointed to by globals at exit. This is popular because it also
+ // solves another problem where the main thread of a C++ program may shut down
+ // before other threads that are still expecting to use those globals. To
+ // handle that case, we expect the program may create a singleton and never
+ // destroy it.
+
+ bool Changed = false;
+
+ // If Dead[n].first is the only use of a malloc result, we can delete its
+ // chain of computation and the store to the global in Dead[n].second.
+ SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
+
+ // Constants can't be pointers to dynamically allocated memory.
+ for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
+ UI != E;) {
+ User *U = *UI++;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ Value *V = SI->getValueOperand();
+ if (isa<Constant>(V)) {
+ Changed = true;
+ SI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, SI));
+ }
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
+ if (isa<Constant>(MSI->getValue())) {
+ Changed = true;
+ MSI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MSI));
+ }
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
+ GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
+ if (MemSrc && MemSrc->isConstant()) {
+ Changed = true;
+ MTI->eraseFromParent();
+ } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+ if (I->hasOneUse())
+ Dead.push_back(std::make_pair(I, MTI));
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ if (isSafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ // This could have invalidated UI, start over from scratch.
+ Dead.clear();
+ CleanupPointerRootUsers(GV, TLI);
+ return true;
+ }
+ }
+ }
+
+ for (int i = 0, e = Dead.size(); i != e; ++i) {
+ if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
+ Dead[i].second->eraseFromParent();
+ Instruction *I = Dead[i].first;
+ do {
+ if (isAllocationFn(I, TLI))
+ break;
+ Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
+ if (!J)
+ break;
+ I->eraseFromParent();
+ I = J;
+ } while (1);
+ I->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+/// We just marked GV constant. Loop over all users of the global, cleaning up
+/// the obvious ones. This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft. This returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ bool Changed = false;
+ // Note that we need to use a weak value handle for the worklist items. When
+ // we delete a constant array, we may also be holding pointer to one of its
+ // elements (or an element of one of its elements if we're dealing with an
+ // array of arrays) in the worklist.
+ SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+ while (!WorkList.empty()) {
+ Value *UV = WorkList.pop_back_val();
+ if (!UV)
+ continue;
+
+ User *U = cast<User>(UV);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (Init) {
+ // Replace the load with the initializer.
+ LI->replaceAllUsesWith(Init);
+ LI->eraseFromParent();
+ Changed = true;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // Store must be unreachable or storing Init into the global.
+ SI->eraseFromParent();
+ Changed = true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Constant *SubInit = nullptr;
+ if (Init)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
+ } else if ((CE->getOpcode() == Instruction::BitCast &&
+ CE->getType()->isPointerTy()) ||
+ CE->getOpcode() == Instruction::AddrSpaceCast) {
+ // Pointer cast, delete any stores and memsets to the global.
+ Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
+ }
+
+ if (CE->use_empty()) {
+ CE->destroyConstant();
+ Changed = true;
+ }
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+ // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+ // and will invalidate our notion of what Init is.
+ Constant *SubInit = nullptr;
+ if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+ ConstantFoldInstruction(GEP, DL, TLI));
+ if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+ SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+ // If the initializer is an all-null value and we have an inbounds GEP,
+ // we already know what the result of any load from that GEP is.
+ // TODO: Handle splats.
+ if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+ SubInit = Constant::getNullValue(GEP->getType()->getElementType());
+ }
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
+
+ if (GEP->use_empty()) {
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+ if (MI->getRawDest() == V) {
+ MI->eraseFromParent();
+ Changed = true;
+ }
+
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ // If we have a chain of dead constantexprs or other things dangling from
+ // us, and if they are all dead, nuke them without remorse.
+ if (isSafeToDestroyConstant(C)) {
+ C->destroyConstant();
+ CleanupConstantGlobalUsers(V, Init, DL, TLI);
+ return true;
+ }
+ }
+ }
+ return Changed;
+}
+
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+ // We might have a dead and dangling constant hanging off of here.
+ if (Constant *C = dyn_cast<Constant>(V))
+ return isSafeToDestroyConstant(C);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Loads are ok.
+ if (isa<LoadInst>(I)) return true;
+
+ // Stores *to* the pointer are ok.
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getOperand(0) != V;
+
+ // Otherwise, it must be a GEP.
+ GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+ if (!GEPI) return false;
+
+ if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+ !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+ return false;
+
+ for (User *U : GEPI->users())
+ if (!isSafeSROAElementUse(U))
+ return false;
+ return true;
+}
+
+
+/// U is a direct user of the specified global value. Look at it and its uses
+/// and decide whether it is safe to SROA this global.
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+ // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
+ cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+ return false;
+
+ // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
+ // don't like < 3 operand CE's, and we don't like non-constant integer
+ // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
+ // value of C.
+ if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+ !cast<Constant>(U->getOperand(1))->isNullValue() ||
+ !isa<ConstantInt>(U->getOperand(2)))
+ return false;
+
+ gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+ ++GEPI; // Skip over the pointer index.
+
+ // If this is a use of an array allocation, do a bit more checking for sanity.
+ if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+ uint64_t NumElements = AT->getNumElements();
+ ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+ // Check to make sure that index falls within the array. If not,
+ // something funny is going on, so we won't do the optimization.
+ //
+ if (Idx->getZExtValue() >= NumElements)
+ return false;
+
+ // We cannot scalar repl this level of the array unless any array
+ // sub-indices are in-range constants. In particular, consider:
+ // A[0][i]. We cannot know that the user isn't doing invalid things like
+ // allowing i to index an out-of-range subscript that accesses A[1].
+ //
+ // Scalar replacing *just* the outer index of the array is probably not
+ // going to be a win anyway, so just give up.
+ for (++GEPI; // Skip array index.
+ GEPI != E;
+ ++GEPI) {
+ uint64_t NumElements;
+ if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+ NumElements = SubArrayTy->getNumElements();
+ else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+ NumElements = SubVectorTy->getNumElements();
+ else {
+ assert((*GEPI)->isStructTy() &&
+ "Indexed GEP type is not array, vector, or struct!");
+ continue;
+ }
+
+ ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+ if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+ return false;
+ }
+ }
+
+ for (User *UU : U->users())
+ if (!isSafeSROAElementUse(UU))
+ return false;
+
+ return true;
+}
+
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+ for (User *U : GV->users())
+ if (!IsUserOfGlobalSafeForSRA(U, GV))
+ return false;
+
+ return true;
+}
+
+
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way. We have determined that this
+/// transformation is safe already. We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
+ // Make sure this global only has simple uses that we can SRA.
+ if (!GlobalUsersSafeToSRA(GV))
+ return nullptr;
+
+ assert(GV->hasLocalLinkage() && !GV->isConstant());
+ Constant *Init = GV->getInitializer();
+ Type *Ty = Init->getType();
+
+ std::vector<GlobalVariable*> NewGlobals;
+ Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = DL.getABITypeAlignment(GV->getType());
+
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *DL.getStructLayout(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Constant *In = Init->getAggregateElement(i);
+ assert(In && "Couldn't get element of initializer?");
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
+ }
+ } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ unsigned NumElements = 0;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(STy)->getNumElements();
+
+ if (NumElements > 16 && GV->hasNUsesOrMore(16))
+ return nullptr; // It's not worth it.
+ NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ Constant *In = Init->getAggregateElement(i);
+ assert(In && "Couldn't get element of initializer?");
+
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+ GlobalVariable::InternalLinkage,
+ In, GV->getName()+"."+Twine(i),
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ Globals.push_back(NGV);
+ NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
+ }
+ }
+
+ if (NewGlobals.empty())
+ return nullptr;
+
+ DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
+
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+ // Loop over all of the uses of the global, replacing the constantexpr geps,
+ // with smaller constantexpr geps or direct references.
+ while (!GV->use_empty()) {
+ User *GEP = GV->user_back();
+ assert(((isa<ConstantExpr>(GEP) &&
+ cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+ isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+ // Ignore the 1th operand, which has to be zero or else the program is quite
+ // broken (undefined). Get the 2nd operand, which is the structure or array
+ // index.
+ unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+ if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+ Value *NewPtr = NewGlobals[Val];
+ Type *NewTy = NewGlobals[Val]->getValueType();
+
+ // Form a shorter GEP if needed.
+ if (GEP->getNumOperands() > 3) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+ Idxs.push_back(CE->getOperand(i));
+ NewPtr =
+ ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
+ } else {
+ GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+ SmallVector<Value*, 8> Idxs;
+ Idxs.push_back(NullInt);
+ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+ Idxs.push_back(GEPI->getOperand(i));
+ NewPtr = GetElementPtrInst::Create(
+ NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
+ }
+ }
+ GEP->replaceAllUsesWith(NewPtr);
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+ GEPI->eraseFromParent();
+ else
+ cast<ConstantExpr>(GEP)->destroyConstant();
+ }
+
+ // Delete the old global, now that it is dead.
+ Globals.erase(GV);
+ ++NumSRA;
+
+ // Loop over the new globals array deleting any globals that are obviously
+ // dead. This can arise due to scalarization of a structure or an array that
+ // has elements that are dead.
+ unsigned FirstGlobal = 0;
+ for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+ if (NewGlobals[i]->use_empty()) {
+ Globals.erase(NewGlobals[i]);
+ if (FirstGlobal == i) ++FirstGlobal;
+ }
+
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
+}
+
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
+ for (const User *U : V->users())
+ if (isa<LoadInst>(U)) {
+ // Will trap.
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Storing the value.
+ }
+ } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+ if (CI->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ if (II->getCalledValue() != V) {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false; // Not calling the ptr
+ }
+ } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // If we've already seen this phi node, ignore it, it has already been
+ // checked.
+ if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ return false;
+ } else if (isa<ICmpInst>(U) &&
+ isa<ConstantPointerNull>(U->getOperand(1))) {
+ // Ignore icmp X, null
+ } else {
+ //cerr << "NONTRAPPING USE: " << *U;
+ return false;
+ }
+
+ return true;
+}
+
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null. Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ for (const User *U : GV->users())
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (isa<StoreInst>(U)) {
+ // Ignore stores to the global.
+ } else {
+ // We don't know or understand this user, bail out.
+ //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+ return false;
+ }
+ return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+ bool Changed = false;
+ for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
+ Instruction *I = cast<Instruction>(*UI++);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ LI->setOperand(0, NewV);
+ Changed = true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getOperand(1) == V) {
+ SI->setOperand(1, NewV);
+ Changed = true;
+ }
+ } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ CallSite CS(I);
+ if (CS.getCalledValue() == V) {
+ // Calling through the pointer! Turn into a direct call, but be careful
+ // that the pointer is not also being passed as an argument.
+ CS.setCalledFunction(NewV);
+ Changed = true;
+ bool PassedAsArg = false;
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == V) {
+ PassedAsArg = true;
+ CS.setArgument(i, NewV);
+ }
+
+ if (PassedAsArg) {
+ // Being passed as an argument also. Be careful to not invalidate UI!
+ UI = V->user_begin();
+ }
+ }
+ } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+ ConstantExpr::getCast(CI->getOpcode(),
+ NewV, CI->getType()));
+ if (CI->use_empty()) {
+ Changed = true;
+ CI->eraseFromParent();
+ }
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ // Should handle GEP here.
+ SmallVector<Constant*, 8> Idxs;
+ Idxs.reserve(GEPI->getNumOperands()-1);
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
+ Idxs.push_back(C);
+ else
+ break;
+ if (Idxs.size() == GEPI->getNumOperands()-1)
+ Changed |= OptimizeAwayTrappingUsesOfValue(
+ GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
+ if (GEPI->use_empty()) {
+ Changed = true;
+ GEPI->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+
+/// The specified global has only one non-null value stored into it. If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ bool Changed = false;
+
+ // Keep track of whether we are able to remove all the uses of the global
+ // other than the store that defines it.
+ bool AllNonStoreUsesGone = true;
+
+ // Replace all uses of loads with uses of uses of the stored value.
+ for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
+ User *GlobalUser = *GUI++;
+ if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+ // If we were able to delete all uses of the loads
+ if (LI->use_empty()) {
+ LI->eraseFromParent();
+ Changed = true;
+ } else {
+ AllNonStoreUsesGone = false;
+ }
+ } else if (isa<StoreInst>(GlobalUser)) {
+ // Ignore the store that stores "LV" to the global.
+ assert(GlobalUser->getOperand(1) == GV &&
+ "Must be storing *to* the global");
+ } else {
+ AllNonStoreUsesGone = false;
+
+ // If we get here we could have other crazy uses that are transitively
+ // loaded.
+ assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+ isa<BitCastInst>(GlobalUser) ||
+ isa<GetElementPtrInst>(GlobalUser)) &&
+ "Only expect load and stores!");
+ }
+ }
+
+ if (Changed) {
+ DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
+ ++NumGlobUses;
+ }
+
+ // If we nuked all of the loads, then none of the stores are needed either,
+ // nor is the global.
+ if (AllNonStoreUsesGone) {
+ if (isLeakCheckerRoot(GV)) {
+ Changed |= CleanupPointerRootUsers(GV, TLI);
+ } else {
+ Changed = true;
+ CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
+ }
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ Changed = true;
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ }
+ return Changed;
+}
+
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
+ if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
+ I->replaceAllUsesWith(NewC);
+
+ // Advance UI to the next non-I use to avoid invalidating it!
+ // Instructions could multiply use V.
+ while (UI != E && *UI == I)
+ ++UI;
+ I->eraseFromParent();
+ }
+}
+
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc. Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+ ConstantInt *NElements, const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
+
+ Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+ // Create the new global variable. The contents of the malloc'd memory is
+ // undefined, so initialize with an undef value.
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+ GV->getThreadLocalMode());
+
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = nullptr;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->user_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (!TheBC)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
+ Constant *RepValue = NewGV;
+ if (NewGV->getType() != GV->getType()->getElementType())
+ RepValue = ConstantExpr::getBitCast(RepValue,
+ GV->getType()->getElementType());
+
+ // If there is a comparison against null, we will insert a global bool to
+ // keep track of whether the global was initialized yet or not.
+ GlobalVariable *InitBool =
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->getThreadLocalMode());
+ bool InitBoolUsed = false;
+
+ // Loop over all uses of GV, processing them in turn.
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
+ // The global is initialized when the store to it occurs.
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->eraseFromParent();
+ continue;
+ }
+
+ LoadInst *LI = cast<LoadInst>(GV->user_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = *LI->use_begin();
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
+ if (!ICI) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ // Replace the cmp X, 0 with a use of the bool value.
+ // Sink the load to where the compare was, if atomic rules allow us to.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+ LI->getOrdering(), LI->getSynchScope(),
+ LI->isUnordered() ? (Instruction*)ICI : LI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
+
+ // If the initialization boolean was used, insert it, otherwise delete it.
+ if (!InitBoolUsed) {
+ while (!InitBool->use_empty()) // Delete initializations
+ cast<StoreInst>(InitBool->user_back())->eraseFromParent();
+ delete InitBool;
+ } else
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
+
+ // Now the GV is dead, nuke it and the malloc..
+ GV->eraseFromParent();
+ CI->eraseFromParent();
+
+ // To further other optimizations, loop over all users of NewGV and try to
+ // constant prop them. This will promote GEP instructions with constant
+ // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+ ConstantPropUsersOf(NewGV, DL, TLI);
+ if (RepValue != NewGV)
+ ConstantPropUsersOf(RepValue, DL, TLI);
+
+ return NewGV;
+}
+
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V. We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+ const GlobalVariable *GV,
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
+ for (const User *U : V->users()) {
+ const Instruction *Inst = cast<Instruction>(U);
+
+ if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+ continue; // Fine, ignore.
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+ return false; // Storing the pointer itself... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
+
+ // Must index into the array and into the struct.
+ if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+ // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
+ // cycles.
+ if (PHIs.insert(PN).second)
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ return false;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV. This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+ GlobalVariable *GV) {
+ while (!Alloc->use_empty()) {
+ Instruction *U = cast<Instruction>(*Alloc->user_begin());
+ Instruction *InsertPt = U;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ // If this is the store of the allocation into the global, remove it.
+ if (SI->getOperand(1) == GV) {
+ SI->eraseFromParent();
+ continue;
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Insert the load in the corresponding predecessor, not right before the
+ // PHI.
+ InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
+ } else if (isa<BitCastInst>(U)) {
+ // Must be bitcast between the malloc and store to initialize the global.
+ ReplaceUsesOfMallocWithGlobal(U, GV);
+ U->eraseFromParent();
+ continue;
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ // If this is a "GEP bitcast" and the user is a store to the global, then
+ // just process it as a bitcast.
+ if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
+ if (SI->getOperand(1) == GV) {
+ // Must be bitcast GEP between the malloc and store to initialize
+ // the global.
+ ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+ GEPI->eraseFromParent();
+ continue;
+ }
+ }
+
+ // Insert a load from the global, and use it instead of the malloc.
+ Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ U->replaceUsesOfWith(Alloc, NL);
+ }
+}
+
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on. This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
+ // We permit two users of the load: setcc comparing against the null
+ // pointer, and a getelementptr of a specific form.
+ for (const User *U : V->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+
+ // Comparison against null is ok.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
+ if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+ return false;
+ continue;
+ }
+
+ // getelementptr is also ok, but only a simple form.
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
+ // Must index into the array and into the struct.
+ if (GEPI->getNumOperands() < 3)
+ return false;
+
+ // Otherwise the GEP is ok.
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
+ if (!LoadUsingPHIsPerLoad.insert(PN).second)
+ // This means some phi nodes are dependent on each other.
+ // Avoid infinite looping!
+ return false;
+ if (!LoadUsingPHIs.insert(PN).second)
+ // If we have already analyzed this PHI, then it is safe.
+ continue;
+
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+ LoadUsingPHIs, LoadUsingPHIsPerLoad))
+ return false;
+
+ continue;
+ }
+
+ // Otherwise we don't know what this is, not ok.
+ return false;
+ }
+
+ return true;
+}
+
+
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+ Instruction *StoredVal) {
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (const User *U : GV->users())
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+ LoadUsingPHIsPerLoad))
+ return false;
+ LoadUsingPHIsPerLoad.clear();
+ }
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (const PHINode *PN : LoadUsingPHIs) {
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == StoredVal) continue;
+
+ if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
+ return false;
+ }
+
+ // Load from GV is ok.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f"+Twine(FieldNo), LI);
+ } else {
+ PHINode *PN = cast<PHINode>(V);
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+
+ PointerType *PTy = cast<PointerType>(PN->getType());
+ StructType *ST = cast<StructType>(PTy->getElementType());
+
+ unsigned AS = PTy->getAddressSpace();
+ PHINode *NewPN =
+ PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
+ PN->getNumIncomingValues(),
+ PN->getName()+".f"+Twine(FieldNo), PN);
+ Result = NewPN;
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ }
+
+ return FieldVals[FieldNo] = Result;
+}
+
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ // If this is a comparison against null, handle it.
+ if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+ assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+ // If we have a setcc of the loaded pointer, we can use a setcc of any
+ // field.
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+ Constant::getNullValue(NPtr->getType()),
+ SCI->getName());
+ SCI->replaceAllUsesWith(New);
+ SCI->eraseFromParent();
+ return;
+ }
+
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+ assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+ && "Unexpected GEPI!");
+
+ // Load the pointer for this field.
+ unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+
+ // Create the new GEP idx vector.
+ SmallVector<Value*, 8> GEPIdx;
+ GEPIdx.push_back(GEPI->getOperand(1));
+ GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+ Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NGEPI);
+ GEPI->eraseFromParent();
+ return;
+ }
+
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ if (!InsertedScalarizedValues.insert(std::make_pair(PN,
+ std::vector<Value*>())).second)
+ return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+}
+
+/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
+/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+ }
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
+}
+
+/// CI is an allocation of an array of structures. Break it up into multiple
+/// allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value *NElems, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
+ Type *MAT = getMallocAllocatedType(CI, TLI);
+ StructType *STy = cast<StructType>(MAT);
+
+ // There is guaranteed to be at least one use of the malloc (storing
+ // it into GV). If there are other uses, change them to be uses of
+ // the global to simplify later code. This also deletes the store
+ // into GV.
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+ // Okay, at this point, there are no users of the malloc. Insert N
+ // new mallocs at the same place as CI, and N globals.
+ std::vector<Value*> FieldGlobals;
+ std::vector<Value*> FieldMallocs;
+
+ unsigned AS = GV->getType()->getPointerAddressSpace();
+ for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+ Type *FieldTy = STy->getElementType(FieldNo);
+ PointerType *PFieldTy = PointerType::get(FieldTy, AS);
+
+ GlobalVariable *NGV = new GlobalVariable(
+ *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+ Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+ nullptr, GV->getThreadLocalMode());
+ FieldGlobals.push_back(NGV);
+
+ unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
+ if (StructType *ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems, nullptr,
+ CI->getName() + ".f" + Twine(FieldNo));
+ FieldMallocs.push_back(NMI);
+ new StoreInst(NMI, NGV, CI);
+ }
+
+ // The tricky aspect of this transformation is handling the case when malloc
+ // fails. In the original code, malloc failing would set the result pointer
+ // of malloc to null. In this case, some mallocs could succeed and others
+ // could fail. As such, we emit code that looks like this:
+ // F0 = malloc(field0)
+ // F1 = malloc(field1)
+ // F2 = malloc(field2)
+ // if (F0 == 0 || F1 == 0 || F2 == 0) {
+ // if (F0) { free(F0); F0 = 0; }
+ // if (F1) { free(F1); F1 = 0; }
+ // if (F2) { free(F2); F2 = 0; }
+ // }
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+ ConstantZero, "isneg");
+ for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ }
+
+ // Split the basic block at the old malloc.
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB =
+ OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
+
+ // Create the block to check the first condition. Put all these blocks at the
+ // end of the function as they are unlikely to be executed.
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
+ OrigBB->getParent());
+
+ // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+ // branch on RunningOr.
+ OrigBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+ // Within the NullPtrBlock, we need to emit a comparison and branch for each
+ // pointer, because some may be null while others are not.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Constant::getNullValue(GVVal->getType()));
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+ OrigBB->getParent());
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+ OrigBB->getParent());
+ Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+ Cmp, NullPtrBlock);
+
+ // Fill in FreeBlock.
+ CallInst::CreateFree(GVVal, BI);
+ new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+ FreeBlock);
+ BranchInst::Create(NextBlock, FreeBlock);
+
+ NullPtrBlock = NextBlock;
+ }
+
+ BranchInst::Create(ContBB, NullPtrBlock);
+
+ // CI is no longer needed, remove it.
+ CI->eraseFromParent();
+
+ /// As we process loads, if we can't immediately update all uses of the load,
+ /// keep track of what scalarized loads are inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+ // Okay, the malloc site is completely handled. All of the uses of GV are now
+ // loads, and all uses of those loads are simple. Rewrite them to use loads
+ // of the per-field globals instead.
+ for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+ }
+ }
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
+ // The old global is now dead, remove it.
+ GV->eraseFromParent();
+
+ ++NumHeapSRA;
+ return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+ Type *AllocTy,
+ AtomicOrdering Ordering,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ // If this is a malloc of an abstract type, don't touch it.
+ if (!AllocTy->isSized())
+ return false;
+
+ // We can't optimize this global unless all uses of it are *known* to be
+ // of the malloc value, not of the null initializer value (consider a use
+ // that compares the global's value against zero to see if the malloc has
+ // been reached). To do this, we check to see if all uses of the global
+ // would trap if the global were null: this proves that they must all
+ // happen after the malloc.
+ if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ return false;
+
+ // We can't optimize this if the malloc itself is used in a complex way,
+ // for example, being stored into multiple globals. This allows the
+ // malloc to be stored into the specified global, loaded icmp'd, and
+ // GEP'd. These are all things we could transform to using the global
+ // for.
+ SmallPtrSet<const PHINode*, 8> PHIs;
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+ return false;
+
+ // If we have a global that is only initialized with a fixed size malloc,
+ // transform the program to use global memory instead of malloc'd memory.
+ // This eliminates dynamic allocation, avoids an indirection accessing the
+ // data, and exposes the resultant global to further GlobalOpt.
+ // We cannot optimize the malloc if we cannot determine malloc array size.
+ Value *NElems = getMallocArraySize(CI, DL, TLI, true);
+ if (!NElems)
+ return false;
+
+ if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+ // Restrict this transformation to only working on small allocations
+ // (2048 bytes currently), as we don't want to introduce a 16M global or
+ // something.
+ if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
+ OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+ return true;
+ }
+
+ // If the allocation is an array of structures, consider transforming this
+ // into multiple malloc'd arrays, one for each field. This is basically
+ // SRoA for malloc'd memory.
+
+ if (Ordering != NotAtomic)
+ return false;
+
+ // If this is an allocation of a fixed size array of structs, analyze as a
+ // variable size array. malloc [100 x struct],1 -> malloc struct, 100
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+ if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+ AllocTy = AT->getElementType();
+
+ StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+ if (!AllocSTy)
+ return false;
+
+ // This the structure has an unreasonable number of fields, leave it
+ // alone.
+ if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+ // If this is a fixed size array, transform the Malloc to be an alloc of
+ // structs. malloc [100 x struct],1 -> malloc struct, 100
+ if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+ unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ nullptr, CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
+ CI->eraseFromParent();
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
+ CI = cast<CallInst>(BCI->getOperand(0));
+ else
+ CI = cast<CallInst>(Malloc);
+ }
+
+ PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+ TLI);
+ return true;
+ }
+
+ return false;
+}
+
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+ AtomicOrdering Ordering,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
+ // Ignore no-op GEPs and bitcasts.
+ StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+ // If we are dealing with a pointer global that is initialized to null and
+ // only has one (non-null) value stored into it, then we can optimize any
+ // users of the loaded value (often calls and loads) that would trap if the
+ // value was null.
+ if (GV->getInitializer()->getType()->isPointerTy() &&
+ GV->getInitializer()->isNullValue()) {
+ if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+ if (GV->getInitializer()->getType() != SOVC->getType())
+ SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+ // Optimize away any trapping uses of the loaded value.
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
+ return true;
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ Type *MallocType = getMallocAllocatedType(CI, TLI);
+ if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ Ordering, DL, TLI))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal. See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used. This exposes
+/// the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+ Type *GVElType = GV->getType()->getElementType();
+
+ // If GVElType is already i1, it is already shrunk. If the type of the GV is
+ // an FP value, pointer or vector, don't do this optimization because a select
+ // between them is very expensive and unlikely to lead to later
+ // simplification. In these cases, we typically end up with "cond ? v1 : v2"
+ // where v1 and v2 both require constant pool loads, a big loss.
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPointTy() ||
+ GVElType->isPointerTy() || GVElType->isVectorTy())
+ return false;
+
+ // Walk the use list of the global seeing if all the uses are load or store.
+ // If there is anything else, bail out.
+ for (User *U : GV->users())
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+ return false;
+
+ DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
+
+ // Create the new global, initializing it to false.
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".b",
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
+ GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
+
+ Constant *InitVal = GV->getInitializer();
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+ "No reason to shrink to bool!");
+
+ // If initialized to zero and storing one into the global, we can use a cast
+ // instead of a select to synthesize the desired value.
+ bool IsOneZero = false;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+ while (!GV->use_empty()) {
+ Instruction *UI = cast<Instruction>(GV->user_back());
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // Change the store into a boolean store.
+ bool StoringOther = SI->getOperand(0) == OtherVal;
+ // Only do this if we weren't storing a loaded value.
+ Value *StoreVal;
+ if (StoringOther || SI->getOperand(0) == InitVal) {
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
+ } else {
+ // Otherwise, we are storing a previously loaded copy. To do this,
+ // change the copy from copying the original value to just copying the
+ // bool.
+ Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+ // If we've already replaced the input, StoredVal will be a cast or
+ // select instruction. If not, it will be a load of the original
+ // global.
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ assert(LI->getOperand(0) == GV && "Not a copy!");
+ // Insert a new load, to preserve the saved value.
+ StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
+ } else {
+ assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+ "This is not a form that we understand!");
+ StoreVal = StoredVal->getOperand(0);
+ assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+ }
+ }
+ new StoreInst(StoreVal, NewGV, false, 0,
+ SI->getOrdering(), SI->getSynchScope(), SI);
+ } else {
+ // Change the load into a load of bool then a select.
+ LoadInst *LI = cast<LoadInst>(UI);
+ LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ LI->getOrdering(), LI->getSynchScope(), LI);
+ Value *NSI;
+ if (IsOneZero)
+ NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+ else
+ NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+ NSI->takeName(LI);
+ LI->replaceAllUsesWith(NSI);
+ }
+ UI->eraseFromParent();
+ }
+
+ // Retain the name of the old global variable. People who are debugging their
+ // programs may expect these variables to be named the same.
+ NewGV->takeName(GV);
+ GV->eraseFromParent();
+ return true;
+}
+
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+ GV.removeDeadConstantUsers();
+
+ if (!GV.isDiscardableIfUnused())
+ return false;
+
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+ return false;
+
+ bool Dead;
+ if (auto *F = dyn_cast<Function>(&GV))
+ Dead = F->isDefTriviallyDead();
+ else
+ Dead = GV.use_empty();
+ if (!Dead)
+ return false;
+
+ DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ GV.eraseFromParent();
+ ++NumDeleted;
+ return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible. If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
+ // Do more involved optimizations if the global is internal.
+ if (!GV.hasLocalLinkage())
+ return false;
+
+ GlobalStatus GS;
+
+ if (GlobalStatus::analyzeGlobal(&GV, GS))
+ return false;
+
+ bool Changed = false;
+ if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+ GV.setUnnamedAddr(true);
+ NumUnnamed++;
+ Changed = true;
+ }
+
+ auto *GVar = dyn_cast<GlobalVariable>(&GV);
+ if (!GVar)
+ return Changed;
+
+ if (GVar->isConstant() || !GVar->hasInitializer())
+ return Changed;
+
+ return processInternalGlobal(GVar, GS) || Changed;
+}
+
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+ // Find all uses of GV. We expect them all to be in F, and if we can't
+ // identify any of the uses we bail out.
+ //
+ // On each of these uses, identify if the memory that GV points to is
+ // used/required/live at the start of the function. If it is not, for example
+ // if the first thing the function does is store to the GV, the GV can
+ // possibly be demoted.
+ //
+ // We don't do an exhaustive search for memory operations - simply look
+ // through bitcasts as they're quite common and benign.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ SmallVector<LoadInst *, 4> Loads;
+ SmallVector<StoreInst *, 4> Stores;
+ for (auto *U : GV->users()) {
+ if (Operator::getOpcode(U) == Instruction::BitCast) {
+ for (auto *UU : U->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(UU))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(UU))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+ continue;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ assert(I->getParent()->getParent() == F);
+
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ Loads.push_back(LI);
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ Stores.push_back(SI);
+ else
+ return false;
+ }
+
+ // We have identified all uses of GV into loads and stores. Now check if all
+ // of them are known not to depend on the value of the global at the function
+ // entry point. We do this by ensuring that every load is dominated by at
+ // least one store.
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+ .getDomTree();
+
+ // The below check is quadratic. Check we're not going to do too many tests.
+ // FIXME: Even though this will always have worst-case quadratic time, we
+ // could put effort into minimizing the average time by putting stores that
+ // have been shown to dominate at least one load at the beginning of the
+ // Stores array, making subsequent dominance checks more likely to succeed
+ // early.
+ //
+ // The threshold here is fairly large because global->local demotion is a
+ // very powerful optimization should it fire.
+ const unsigned Threshold = 100;
+ if (Loads.size() * Stores.size() > Threshold)
+ return false;
+
+ for (auto *L : Loads) {
+ auto *LTy = L->getType();
+ if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+ auto *STy = S->getValueOperand()->getType();
+ // The load is only dominated by the store if DomTree says so
+ // and the number of bits loaded in L is less than or equal to
+ // the number of bits stored in S.
+ return DT.dominates(S, L) &&
+ DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+ }))
+ return false;
+ }
+ // All loads have known dependences inside F, so the global can be localized.
+ return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+ // We don't do this exhaustively. The most common pattern that we really need
+ // to care about is a constant GEP or constant bitcast - so just looking
+ // through one single ConstantExpr.
+ //
+ // The set of constants that this function returns true for must be able to be
+ // handled by makeAllConstantUsesInstructions.
+ for (auto *U : C->users()) {
+ if (isa<Instruction>(U))
+ continue;
+ if (!isa<ConstantExpr>(U))
+ // Non instruction, non-constantexpr user; cannot convert this.
+ return false;
+ for (auto *UU : U->users())
+ if (!isa<Instruction>(UU))
+ // A constantexpr used by another constant. We don't try and recurse any
+ // further but just bail out at this point.
+ return false;
+ }
+
+ return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+ SmallVector<ConstantExpr*,4> Users;
+ for (auto *U : C->users()) {
+ if (isa<ConstantExpr>(U))
+ Users.push_back(cast<ConstantExpr>(U));
+ else
+ // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+ // should not have returned true for C.
+ assert(
+ isa<Instruction>(U) &&
+ "Can't transform non-constantexpr non-instruction to instruction!");
+ }
+
+ SmallVector<Value*,4> UUsers;
+ for (auto *U : Users) {
+ UUsers.clear();
+ for (auto *UU : U->users())
+ UUsers.push_back(UU);
+ for (auto *UU : UUsers) {
+ Instruction *UI = cast<Instruction>(UU);
+ Instruction *NewU = U->getAsInstruction();
+ NewU->insertBefore(UI);
+ UI->replaceUsesOfWith(U, NewU);
+ }
+ U->dropAllReferences();
+ }
+}
+
+/// Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
+ const GlobalStatus &GS) {
+ auto &DL = GV->getParent()->getDataLayout();
+ // If this is a first class global and has only one accessing function and
+ // this function is non-recursive, we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non-single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GV->getType()->getAddressSpace() == 0 &&
+ !GV->isExternallyInitialized() &&
+ allNonInstructionUsersCanBeMadeInstructions(GV) &&
+ GS.AccessingFunction->doesNotRecurse() &&
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
+ Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ Type *ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ makeAllConstantUsesInstructions(GV);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.IsLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
+
+ bool Changed;
+ if (isLeakCheckerRoot(GV)) {
+ // Delete any constant stores to the global.
+ Changed = CleanupPointerRootUsers(GV, TLI);
+ } else {
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+ }
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ ++NumDeleted;
+ Changed = true;
+ }
+ return Changed;
+
+ } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
+ GV->setConstant(true);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (SRAGlobal(GV, DL))
+ return true;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+ ++NumSubstitute;
+ return true;
+ }
+
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
+ return true;
+
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+ if (GS.Ordering == NotAtomic) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallSite CS(cast<Instruction>(U));
+ CS.setCallingConv(CallingConv::Fast);
+ }
+}
+
+static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
+ for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+ unsigned Index = Attrs.getSlotIndex(i);
+ if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
+ continue;
+
+ // There can be only one.
+ return Attrs.removeAttribute(C, Index, Attribute::Nest);
+ }
+
+ return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+ F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallSite CS(cast<Instruction>(U));
+ CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+ }
+}
+
+/// Return true if this is a calling convention that we'd like to change. The
+/// idea here is that we don't want to mess with the convention if the user
+/// explicitly requested something with performance implications like coldcc,
+/// GHC, or anyregcc.
+static bool isProfitableToMakeFastCC(Function *F) {
+ CallingConv::ID CC = F->getCallingConv();
+ // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
+ return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+ bool Changed = false;
+ // Optimize functions.
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+ Function *F = &*FI++;
+ // Functions without names cannot be referenced outside this module.
+ if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
+ F->setLinkage(GlobalValue::InternalLinkage);
+
+ if (deleteIfDead(*F)) {
+ Changed = true;
+ continue;
+ }
+
+ Changed |= processGlobal(*F);
+
+ if (!F->hasLocalLinkage())
+ continue;
+ if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+ !F->hasAddressTaken()) {
+ // If this function has a calling convention worth changing, is not a
+ // varargs function, and is only called directly, promote it to use the
+ // Fast calling convention.
+ F->setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(F);
+ ++NumFastCallFns;
+ Changed = true;
+ }
+
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F->hasAddressTaken()) {
+ // The function is not used by a trampoline intrinsic, so it is safe
+ // to remove the 'nest' attribute.
+ RemoveNestAttribute(F);
+ ++NumNestRemoved;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+ bool Changed = false;
+
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = &*GVI++;
+ // Global variables without names cannot be referenced outside this module.
+ if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ // Simplify the initializer.
+ if (GV->hasInitializer())
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ auto &DL = M.getDataLayout();
+ Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (New && New != CE)
+ GV->setInitializer(New);
+ }
+
+ if (deleteIfDead(*GV)) {
+ Changed = true;
+ continue;
+ }
+
+ Changed |= processGlobal(*GV);
+ }
+ return Changed;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
+
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // Simple global addresses are supported, do not allow dllimport or
+ // thread-local globals.
+ if (auto *GV = dyn_cast<GlobalValue>(C))
+ return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+ // Simple integer, undef, constant aggregate zero, etc are all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ for (Value *Op : C->operands())
+ if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
+ return false;
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ // Bitcast is fine if the casted value is fine.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // int <=> ptr is fine if the int type is the same size as the
+ // pointer type.
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C).second)
+ return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
+}
+
+
+/// Return true if this constant is simple enough for us to understand. In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt. We basically just support direct accesses to
+/// globals and GEP's of globals. This should be kept up to date with
+/// CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+ return false;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+ // Do not allow weak/*_odr/linkonce linkage or external globals.
+ return GV->hasUniqueInitializer();
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ // Handle a constantexpr gep.
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0)) &&
+ cast<GEPOperator>(CE)->isInBounds()) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ if (!GV->hasUniqueInitializer())
+ return false;
+
+ // The first index must be zero.
+ ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
+ if (!CI || !CI->isZero()) return false;
+
+ // The remaining indices must be compile-time known integers within the
+ // notional bounds of the corresponding static array types.
+ if (!CE->isGEPWithNoNotionalOverIndexing())
+ return false;
+
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+ }
+ }
+
+ return false;
+}
+
+/// Evaluate a piece of a constantexpr store into a global initializer. This
+/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+ ConstantExpr *Addr, unsigned OpNo) {
+ // Base case of the recursion.
+ if (OpNo == Addr->getNumOperands()) {
+ assert(Val->getType() == Init->getType() && "Type mismatch!");
+ return Val;
+ }
+
+ SmallVector<Constant*, 32> Elts;
+ if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
+ // Break up the constant into its elements.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
+
+ // Replace the element that we are supposed to.
+ ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+ unsigned Idx = CU->getZExtValue();
+ assert(Idx < STy->getNumElements() && "Struct index out of range!");
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+ // Return the modified struct.
+ return ConstantStruct::get(STy, Elts);
+ }
+
+ ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+ SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+ uint64_t NumElts;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+ NumElts = ATy->getNumElements();
+ else
+ NumElts = InitTy->getVectorNumElements();
+
+ // Break up the array into elements.
+ for (uint64_t i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
+
+ assert(CI->getZExtValue() < NumElts);
+ Elts[CI->getZExtValue()] =
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+ if (Init->getType()->isArrayTy())
+ return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+ return ConstantVector::get(Elts);
+}
+
+/// We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ assert(GV->hasInitializer());
+ GV->setInitializer(Val);
+ return;
+ }
+
+ ConstantExpr *CE = cast<ConstantExpr>(Addr);
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+namespace {
+
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction. Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete. Once an evaluation call
+/// fails, the evaluation object should not be reused.
+class Evaluator {
+public:
+ Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+ : DL(DL), TLI(TLI) {
+ ValueStack.emplace_back();
+ }
+
+ ~Evaluator() {
+ for (auto &Tmp : AllocaTmps)
+ // If there are still users of the alloca, the program is doing something
+ // silly, e.g. storing the address of the alloca somewhere and using it
+ // later. Since this is undefined, we'll just make it be null.
+ if (!Tmp->use_empty())
+ Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+ }
+
+ /// Evaluate a call to function F, returning true if successful, false if we
+ /// can't evaluate it. ActualArgs contains the formal arguments for the
+ /// function.
+ bool EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs);
+
+ /// Evaluate all instructions in block BB, returning true if successful, false
+ /// if we can't evaluate it. NewBB returns the next BB that control flows
+ /// into, or null upon return.
+ bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
+
+ Constant *getVal(Value *V) {
+ if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+ Constant *R = ValueStack.back().lookup(V);
+ assert(R && "Reference to an uncomputed value!");
+ return R;
+ }
+
+ void setVal(Value *V, Constant *C) {
+ ValueStack.back()[V] = C;
+ }
+
+ const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
+ return MutatedMemory;
+ }
+
+ const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const {
+ return Invariants;
+ }
+
+private:
+ Constant *ComputeLoadResult(Constant *P);
+
+ /// As we compute SSA register values, we store their contents here. The back
+ /// of the deque contains the current function and the stack contains the
+ /// values in the calling frames.
+ std::deque<DenseMap<Value*, Constant*>> ValueStack;
+
+ /// This is used to detect recursion. In pathological situations we could hit
+ /// exponential behavior, but at least there is nothing unbounded.
+ SmallVector<Function*, 4> CallStack;
+
+ /// For each store we execute, we update this map. Loads check this to get
+ /// the most up-to-date value. If evaluation is successful, this state is
+ /// committed to the process.
+ DenseMap<Constant*, Constant*> MutatedMemory;
+
+ /// To 'execute' an alloca, we create a temporary global variable to represent
+ /// its body. This vector is needed so we can delete the temporary globals
+ /// when we are done.
+ SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
+
+ /// These global variables have been marked invariant by the static
+ /// constructor.
+ SmallPtrSet<GlobalVariable*, 8> Invariants;
+
+ /// These are constants we have checked and know to be simple enough to live
+ /// in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
+ const DataLayout &DL;
+ const TargetLibraryInfo *TLI;
+};
+
+} // anonymous namespace
+
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed. If we can't decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
+ // If this memory location has been recently stored, use the stored value: it
+ // is the most up-to-date.
+ DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+ if (I != MutatedMemory.end()) return I->second;
+
+ // Access it.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (GV->hasDefinitiveInitializer())
+ return GV->getInitializer();
+ return nullptr;
+ }
+
+ // Handle a constantexpr getelementptr.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+ if (CE->getOpcode() == Instruction::GetElementPtr &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ if (GV->hasDefinitiveInitializer())
+ return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+ }
+
+ return nullptr; // don't know how to evaluate.
+}
+
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it. NewBB returns the next BB that control flows into,
+/// or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+ BasicBlock *&NextBB) {
+ // This is the main evaluation loop.
+ while (1) {
+ Constant *InstResult = nullptr;
+
+ DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+ if (!SI->isSimple()) {
+ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+ Constant *Ptr = getVal(SI->getOperand(1));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+ DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+ if (!isSimpleEnoughPointerToCommit(Ptr)) {
+ // If this is too complex for us to commit, reject it.
+ DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+ return false;
+ }
+
+ Constant *Val = getVal(SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
+ DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+ << "\n");
+ return false;
+ }
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+
+ DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+ }
+ }
+
+ MutatedMemory[Ptr] = Val;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+ InstResult = ConstantExpr::get(BO->getOpcode(),
+ getVal(BO->getOperand(0)),
+ getVal(BO->getOperand(1)));
+ DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+ InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+ getVal(CI->getOperand(0)),
+ getVal(CI->getOperand(1)));
+ DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+ InstResult = ConstantExpr::getCast(CI->getOpcode(),
+ getVal(CI->getOperand(0)),
+ CI->getType());
+ DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+ InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+ getVal(SI->getOperand(1)),
+ getVal(SI->getOperand(2)));
+ DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getExtractValue(
+ getVal(EVI->getAggregateOperand()), EVI->getIndices());
+ DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getInsertValue(
+ getVal(IVI->getAggregateOperand()),
+ getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+ DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+ Constant *P = getVal(GEP->getOperand(0));
+ SmallVector<Constant*, 8> GEPOps;
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(*i));
+ InstResult =
+ ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
+ DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+ << "\n");
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+
+ if (!LI->isSimple()) {
+ DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+
+ Constant *Ptr = getVal(LI->getOperand(0));
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+ DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: " << *Ptr << "\n");
+ }
+ InstResult = ComputeLoadResult(Ptr);
+ if (!InstResult) {
+ DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+ if (AI->isArrayAllocation()) {
+ DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
+ Type *Ty = AI->getType()->getElementType();
+ AllocaTmps.push_back(
+ make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+ UndefValue::get(Ty), AI->getName()));
+ InstResult = AllocaTmps.back().get();
+ DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+ } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+ CallSite CS(&*CurInst);
+
+ // Debug info can safely be ignored here.
+ if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ DEBUG(dbgs() << "Ignoring debug info.\n");
+ ++CurInst;
+ continue;
+ }
+
+ // Cannot handle inline asm.
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+ if (MSI->isVolatile()) {
+ DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+ "intrinsic.\n");
+ return false;
+ }
+ Constant *Ptr = getVal(MSI->getDest());
+ Constant *Val = getVal(MSI->getValue());
+ Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+ // This memset is a no-op.
+ DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
+ }
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+ // We don't insert an entry into Values, as it doesn't have a
+ // meaningful return value.
+ if (!II->use_empty()) {
+ DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
+ return false;
+ }
+ ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+ Value *PtrArg = getVal(II->getArgOperand(1));
+ Value *Ptr = PtrArg->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
+ if (!Size->isAllOnesValue() &&
+ Size->getValue().getLimitedValue() >=
+ DL.getTypeStoreSize(ElemTy)) {
+ Invariants.insert(GV);
+ DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+ << "\n");
+ } else {
+ DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
+ }
+ // Continue even if we do nothing.
+ ++CurInst;
+ continue;
+ } else if (II->getIntrinsicID() == Intrinsic::assume) {
+ DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+ ++CurInst;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+ return false;
+ }
+
+ // Resolve function pointers.
+ Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+ if (!Callee || Callee->mayBeOverridden()) {
+ DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
+
+ SmallVector<Constant*, 8> Formals;
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+ Formals.push_back(getVal(*i));
+
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
+ InstResult = C;
+ DEBUG(dbgs() << "Constant folded function call. Result: " <<
+ *InstResult << "\n");
+ } else {
+ DEBUG(dbgs() << "Can not constant fold function call.\n");
+ return false;
+ }
+ } else {
+ if (Callee->getFunctionType()->isVarArg()) {
+ DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+ return false;
+ }
+
+ Constant *RetVal = nullptr;
+ // Execute the call, if successful, use the return value.
+ ValueStack.emplace_back();
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ ValueStack.pop_back();
+ InstResult = RetVal;
+
+ if (InstResult) {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
+ InstResult << "\n\n");
+ } else {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ }
+ }
+ } else if (isa<TerminatorInst>(CurInst)) {
+ DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+ if (BI->isUnconditional()) {
+ NextBB = BI->getSuccessor(0);
+ } else {
+ ConstantInt *Cond =
+ dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+ if (!Cond) return false; // Cannot determine.
+
+ NextBB = BI->getSuccessor(!Cond->getZExtValue());
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+ ConstantInt *Val =
+ dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+ if (!Val) return false; // Cannot determine.
+ NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+ Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+ NextBB = BA->getBasicBlock();
+ else
+ return false; // Cannot determine.
+ } else if (isa<ReturnInst>(CurInst)) {
+ NextBB = nullptr;
+ } else {
+ // invoke, unwind, resume, unreachable.
+ DEBUG(dbgs() << "Can not handle terminator.");
+ return false; // Cannot handle this terminator.
+ }
+
+ // We succeeded at evaluating this block!
+ DEBUG(dbgs() << "Successfully evaluated block.\n");
+ return true;
+ } else {
+ // Did not know how to evaluate this!
+ DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
+ return false;
+ }
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
+
+ setVal(&*CurInst, InstResult);
+ }
+
+ // If we just processed an invoke, we finished evaluating the block.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+ NextBB = II->getNormalDest();
+ DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+ return true;
+ }
+
+ // Advance program counter.
+ ++CurInst;
+ }
+}
+
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it. ActualArgs contains the formal arguments for the
+/// function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+ const SmallVectorImpl<Constant*> &ActualArgs) {
+ // Check to see if this function is already executing (recursion). If so,
+ // bail out. TODO: we might want to accept limited recursion.
+ if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+ return false;
+
+ CallStack.push_back(F);
+
+ // Initialize arguments to the incoming values specified.
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++ArgNo)
+ setVal(&*AI, ActualArgs[ArgNo]);
+
+ // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
+ // we can only evaluate any one basic block at most once. This set keeps
+ // track of what we have executed so we can detect recursive cases etc.
+ SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+ // CurBB - The current basic block we're evaluating.
+ BasicBlock *CurBB = &F->front();
+
+ BasicBlock::iterator CurInst = CurBB->begin();
+
+ while (1) {
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
+ DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+ if (!EvaluateBlock(CurInst, NextBB))
+ return false;
+
+ if (!NextBB) {
+ // Successfully running until there's no next block means that we found
+ // the return. Fill it the return value and pop the call stack.
+ ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+ if (RI->getNumOperands())
+ RetVal = getVal(RI->getOperand(0));
+ CallStack.pop_back();
+ return true;
+ }
+
+ // Okay, we succeeded in evaluating this control flow. See if we have
+ // executed the new block before. If so, we have a looping function,
+ // which we cannot evaluate in reasonable time.
+ if (!ExecutedBlocks.insert(NextBB).second)
+ return false; // looped!
+
+ // Okay, we have never been in this block before. Check to see if there
+ // are any PHI nodes. If so, evaluate them with information about where
+ // we came from.
+ PHINode *PN = nullptr;
+ for (CurInst = NextBB->begin();
+ (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+ setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+ // Advance to the next block.
+ CurBB = NextBB;
+ }
+}
+
+/// Evaluate static constructors in the function, if we can. Return true if we
+/// can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ // Call the function.
+ Evaluator Eval(DL, TLI);
+ Constant *RetValDummy;
+ bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
+ SmallVector<Constant*, 0>());
+
+ if (EvalSuccess) {
+ ++NumCtorsEvaluated;
+
+ // We succeeded at evaluation: commit the result.
+ DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to " << Eval.getMutatedMemory().size()
+ << " stores.\n");
+ for (DenseMap<Constant*, Constant*>::const_iterator I =
+ Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
+ I != E; ++I)
+ CommitValueTo(I->second, I->first);
+ for (GlobalVariable *GV : Eval.getInvariants())
+ GV->setConstant(true);
+ }
+
+ return EvalSuccess;
+}
+
+static int compareNames(Constant *const *A, Constant *const *B) {
+ return (*A)->stripPointerCasts()->getName().compare(
+ (*B)->stripPointerCasts()->getName());
+}
+
+static void setUsedInitializer(GlobalVariable &V,
+ const SmallPtrSet<GlobalValue *, 8> &Init) {
+ if (Init.empty()) {
+ V.eraseFromParent();
+ return;
+ }
+
+ // Type of pointer to the array of pointers.
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
+
+ SmallVector<llvm::Constant *, 8> UsedArray;
+ for (GlobalValue *GV : Init) {
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
+ UsedArray.push_back(Cast);
+ }
+ // Sort to get deterministic order.
+ array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
+ ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
+
+ Module *M = V.getParent();
+ V.removeFromParent();
+ GlobalVariable *NV =
+ new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+ llvm::ConstantArray::get(ATy, UsedArray), "");
+ NV->takeName(&V);
+ NV->setSection("llvm.metadata");
+ delete &V;
+}
+
+namespace {
+/// An easy to access representation of llvm.used and llvm.compiler.used.
+class LLVMUsed {
+ SmallPtrSet<GlobalValue *, 8> Used;
+ SmallPtrSet<GlobalValue *, 8> CompilerUsed;
+ GlobalVariable *UsedV;
+ GlobalVariable *CompilerUsedV;
+
+public:
+ LLVMUsed(Module &M) {
+ UsedV = collectUsedGlobalVariables(M, Used, false);
+ CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
+ }
+ typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+ typedef iterator_range<iterator> used_iterator_range;
+ iterator usedBegin() { return Used.begin(); }
+ iterator usedEnd() { return Used.end(); }
+ used_iterator_range used() {
+ return used_iterator_range(usedBegin(), usedEnd());
+ }
+ iterator compilerUsedBegin() { return CompilerUsed.begin(); }
+ iterator compilerUsedEnd() { return CompilerUsed.end(); }
+ used_iterator_range compilerUsed() {
+ return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
+ }
+ bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
+ bool compilerUsedCount(GlobalValue *GV) const {
+ return CompilerUsed.count(GV);
+ }
+ bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
+ bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
+ bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
+ bool compilerUsedInsert(GlobalValue *GV) {
+ return CompilerUsed.insert(GV).second;
+ }
+
+ void syncVariablesAndSets() {
+ if (UsedV)
+ setUsedInitializer(*UsedV, Used);
+ if (CompilerUsedV)
+ setUsedInitializer(*CompilerUsedV, CompilerUsed);
+ }
+};
+}
+
+static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
+ if (GA.use_empty()) // No use at all.
+ return false;
+
+ assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler.used");
+ if (!GA.hasOneUse())
+ // Strictly more than one use. So at least one is not in llvm.used and
+ // llvm.compiler.used.
+ return true;
+
+ // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
+ return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
+}
+
+static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
+ const LLVMUsed &U) {
+ unsigned N = 2;
+ assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler.used");
+ if (U.usedCount(&V) || U.compilerUsedCount(&V))
+ ++N;
+ return V.hasNUsesOrMore(N);
+}
+
+static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
+ if (!GA.hasLocalLinkage())
+ return true;
+
+ return U.usedCount(&GA) || U.compilerUsedCount(&GA);
+}
+
+static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
+ bool &RenameTarget) {
+ RenameTarget = false;
+ bool Ret = false;
+ if (hasUseOtherThanLLVMUsed(GA, U))
+ Ret = true;
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!mayHaveOtherReferences(GA, U))
+ return Ret;
+
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ Constant *Aliasee = GA.getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ if (!Target->hasLocalLinkage())
+ return Ret;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
+ return Ret;
+
+ RenameTarget = true;
+ return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+ bool Changed = false;
+ LLVMUsed Used(M);
+
+ for (GlobalValue *GV : Used.used())
+ Used.compilerUsedErase(GV);
+
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E;) {
+ GlobalAlias *J = &*I++;
+
+ // Aliases without names cannot be referenced outside this module.
+ if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
+ J->setLinkage(GlobalValue::InternalLinkage);
+
+ if (deleteIfDead(*J)) {
+ Changed = true;
+ continue;
+ }
+
+ // If the aliasee may change at link time, nothing can be done - bail out.
+ if (J->mayBeOverridden())
+ continue;
+
+ Constant *Aliasee = J->getAliasee();
+ GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
+ // We can't trivially replace the alias with the aliasee if the aliasee is
+ // non-trivial in some way.
+ // TODO: Try to handle non-zero GEPs of local aliasees.
+ if (!Target)
+ continue;
+ Target->removeDeadConstantUsers();
+
+ // Make all users of the alias use the aliasee instead.
+ bool RenameTarget;
+ if (!hasUsesToReplace(*J, Used, RenameTarget))
+ continue;
+
+ J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
+ ++NumAliasesResolved;
+ Changed = true;
+
+ if (RenameTarget) {
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(&*J);
+ Target->setLinkage(J->getLinkage());
+ Target->setVisibility(J->getVisibility());
+ Target->setDLLStorageClass(J->getDLLStorageClass());
+
+ if (Used.usedErase(&*J))
+ Used.usedInsert(Target);
+
+ if (Used.compilerUsedErase(&*J))
+ Used.compilerUsedInsert(Target);
+ } else if (mayHaveOtherReferences(*J, Used))
+ continue;
+
+ // Delete the alias.
+ M.getAliasList().erase(J);
+ ++NumAliasesRemoved;
+ Changed = true;
+ }
+
+ Used.syncVariablesAndSets();
+
+ return Changed;
+}
+
+static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::cxa_atexit))
+ return nullptr;
+
+ Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
+
+ if (!Fn)
+ return nullptr;
+
+ FunctionType *FTy = Fn->getFunctionType();
+
+ // Checking that the function has the right return type, the right number of
+ // parameters and that they all have pointer types should be enough.
+ if (!FTy->getReturnType()->isIntegerTy() ||
+ FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return nullptr;
+
+ return Fn;
+}
+
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
+/// other side-effect free instructions.
+static bool cxxDtorIsEmpty(const Function &Fn,
+ SmallPtrSet<const Function *, 8> &CalledFunctions) {
+ // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+ // nounwind, but that doesn't seem worth doing.
+ if (Fn.isDeclaration())
+ return false;
+
+ if (++Fn.begin() != Fn.end())
+ return false;
+
+ const BasicBlock &EntryBlock = Fn.getEntryBlock();
+ for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+ I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore debug intrinsics.
+ if (isa<DbgInfoIntrinsic>(CI))
+ continue;
+
+ const Function *CalledFn = CI->getCalledFunction();
+
+ if (!CalledFn)
+ return false;
+
+ SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+ // Don't treat recursive functions as empty.
+ if (!NewCalledFunctions.insert(CalledFn).second)
+ return false;
+
+ if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+ return false;
+ } else if (isa<ReturnInst>(*I))
+ return true; // We're done.
+ else if (I->mayHaveSideEffects())
+ return false; // Destructor with side effects, bail.
+ }
+
+ return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+ /// Itanium C++ ABI p3.3.5:
+ ///
+ /// After constructing a global (or local static) object, that will require
+ /// destruction on exit, a termination function is registered as follows:
+ ///
+ /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+ ///
+ /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+ /// call f(p) when DSO d is unloaded, before all such termination calls
+ /// registered before this one. It returns zero if registration is
+ /// successful, nonzero on failure.
+
+ // This pass will look for calls to __cxa_atexit where the function is trivial
+ // and remove them.
+ bool Changed = false;
+
+ for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
+ I != E;) {
+ // We're only interested in calls. Theoretically, we could handle invoke
+ // instructions as well, but neither llvm-gcc nor clang generate invokes
+ // to __cxa_atexit.
+ CallInst *CI = dyn_cast<CallInst>(*I++);
+ if (!CI)
+ continue;
+
+ Function *DtorFn =
+ dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+ if (!DtorFn)
+ continue;
+
+ SmallPtrSet<const Function *, 8> CalledFunctions;
+ if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ continue;
+
+ // Just remove the call.
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ CI->eraseFromParent();
+
+ ++NumCXXDtorsRemoved;
+
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+ bool Changed = false;
+
+ auto &DL = M.getDataLayout();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ NotDiscardableComdats.clear();
+ for (const GlobalVariable &GV : M.globals())
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.isDiscardableIfUnused() || !GV.use_empty())
+ NotDiscardableComdats.insert(C);
+ for (Function &F : M)
+ if (const Comdat *C = F.getComdat())
+ if (!F.isDefTriviallyDead())
+ NotDiscardableComdats.insert(C);
+ for (GlobalAlias &GA : M.aliases())
+ if (const Comdat *C = GA.getComdat())
+ if (!GA.isDiscardableIfUnused() || !GA.use_empty())
+ NotDiscardableComdats.insert(C);
+
+ // Delete functions that are trivially dead, ccc -> fastcc
+ LocalChange |= OptimizeFunctions(M);
+
+ // Optimize global_ctors list.
+ LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
+ return EvaluateStaticConstructor(F, DL, TLI);
+ });
+
+ // Optimize non-address-taken globals.
+ LocalChange |= OptimizeGlobalVars(M);
+
+ // Resolve aliases, when possible.
+ LocalChange |= OptimizeGlobalAliases(M);
+
+ // Try to remove trivial global destructors if they are not removed
+ // already.
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
+ if (CXAAtExitFn)
+ LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
+ Changed |= LocalChange;
+ }
+
+ // TODO: Move all global ctors functions to the end of the module for code
+ // layout.
+
+ return Changed;
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 0000000..af541d1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass. It could certainly be improved in many different ways,
+// like using a worklist. This pass makes arguments dead, but does not remove
+// them. The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ipconstprop"
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+ /// IPCP - The interprocedural constant propagation pass
+ ///
+ struct IPCP : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+ private:
+ bool PropagateConstantsIntoArguments(Function &F);
+ bool PropagateConstantReturn(Function &F);
+ };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Delete any klingons.
+ I->removeDeadConstantUsers();
+ if (I->hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(*I);
+ Changed |= PropagateConstantReturn(*I);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function. If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+ if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ ArgumentConstants.resize(F.arg_size());
+
+ unsigned NumNonconstant = 0;
+ for (Use &U : F.uses()) {
+ User *UR = U.getUser();
+ // Ignore blockaddress uses.
+ if (isa<BlockAddress>(UR)) continue;
+
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
+ return false;
+
+ CallSite CS(cast<Instruction>(UR));
+ if (!CS.isCallee(&U))
+ return false;
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == nullptr) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
+ }
+ }
+ }
+
+ // If we got to this point, there is a constant argument!
+ assert(NumNonconstant != ArgumentConstants.size());
+ bool MadeChange = false;
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty() ||
+ AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (!V) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+ if (F.getReturnType()->isVoidTy())
+ return false; // No return value.
+
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.mayBeOverridden())
+ return false;
+
+ // Check to see if this function returns a constant.
+ SmallVector<Value *,4> RetVals;
+ StructType *STy = dyn_cast<StructType>(F.getReturnType());
+ if (STy)
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+ else
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+ unsigned NumNonConstant = 0;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy)
+ V = RI->getOperand(0);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
+ }
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
+ }
+ }
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = nullptr;
+ // All values non-constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
+ }
+ }
+
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Use &U : F.uses()) {
+ CallSite CS(U.getUser());
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || !CS.isCallee(&U))
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (!STy) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) {
+ Instruction *Ins = cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ if (MadeChange) ++NumReturnValProped;
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 0000000..7ea6c08
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,118 @@
+//===-- IPO.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeCrossDSOCFIPass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeForceFunctionAttrsLegacyPassPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInferFunctionAttrsLegacyPassPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeLowerBitSetsPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesLegacyPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+ initializeBarrierNoopPass(Registry);
+ initializeEliminateAvailableExternallyPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
+ initializeFunctionImportPassPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+ std::vector<const char *> Export;
+ if (AllButMain)
+ Export.push_back("main");
+ unwrap(PM)->add(createInternalizePass(Export));
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 0000000..d02c861
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+ if (F.doesNotAccessMemory())
+ return false;
+ F.setDoesNotAccessMemory();
+ ++NumReadNone;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+ if (F.onlyReadsMemory())
+ return false;
+ F.setOnlyReadsMemory();
+ ++NumReadOnly;
+ return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+ if (F.doesNotThrow())
+ return false;
+ F.setDoesNotThrow();
+ ++NumNoUnwind;
+ return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+ if (F.doesNotCapture(n))
+ return false;
+ F.setDoesNotCapture(n);
+ ++NumNoCapture;
+ return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+ if (F.onlyReadsMemory(n))
+ return false;
+ F.setOnlyReadsMemory(n);
+ ++NumReadOnlyArg;
+ return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+ if (F.doesNotAlias(n))
+ return false;
+ F.setDoesNotAlias(n);
+ ++NumNoAlias;
+ return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+ const TargetLibraryInfo &TLI) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+ return false;
+
+ bool Changed = false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::strcmp: // 0,1
+ case LibFunc::strspn: // 0,1
+ case LibFunc::strncmp: // 0,1
+ case LibFunc::strcspn: // 0,1
+ case LibFunc::strcoll: // 0,1
+ case LibFunc::strcasecmp: // 0,1
+ case LibFunc::strncasecmp: //
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::sscanf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::sprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::mkdir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::rewind:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::rmdir:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::rename:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F);
+ return Changed;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 3);
+ return Changed;
+ case LibFunc::fread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::fputs:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyReadsMemory(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::uname:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::unlink:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::pread:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pread" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "pwrite" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::putchar:
+ Changed |= setDoesNotThrow(F);
+ return Changed;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vsscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 3);
+ Changed |= setOnlyReadsMemory(F, 3);
+ return Changed;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAccessMemory(F);
+ return Changed;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ Changed |= setDoesNotCapture(F, 4);
+ return Changed;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setOnlyReadsMemory(F, 2);
+ return Changed;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ return Changed;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotAlias(F, 0);
+ return Changed;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ return Changed;
+ case LibFunc::gettimeofday:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // Currently some platforms have the restrict keyword on the arguments to
+ // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+ // arguments.
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ return Changed;
+
+ default:
+ // FIXME: It'd be really nice to cover all the library functions we're
+ // aware of here.
+ return false;
+ }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+ const TargetLibraryInfo &TLI) {
+ bool Changed = false;
+
+ for (Function &F : M.functions())
+ // We only infer things using the prototype if the definition isn't around
+ // to analyze directly.
+ if (F.isDeclaration())
+ Changed |= inferPrototypeAttributes(F, TLI);
+
+ return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+ AnalysisManager<Module> *AM) {
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+ if (!inferAllPrototypeAttributes(M, TLI))
+ // If we didn't infer anything, preserve all analyses.
+ return PreservedAnalyses::all();
+
+ // Otherwise, we may have changed fundamental function attributes, so clear
+ // out all the passes.
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeInferFunctionAttrsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return inferAllPrototypeAttributes(M, TLI);
+ }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+ "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+ return new InferFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 0000000..1704bfe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,100 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+namespace {
+
+/// \brief Inliner pass which only handles "always inline" functions.
+class AlwaysInliner : public Inliner {
+
+public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ AlwaysInliner(bool InsertLifetime)
+ : Inliner(ID, -2000000000, InsertLifetime) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ InlineCost getInlineCost(CallSite CS) override;
+
+ using llvm::Pass::doFinalization;
+ bool doFinalization(CallGraph &CG) override {
+ return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
+ }
+};
+
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
+ return new AlwaysInliner(InsertLifetime);
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Only inline direct calls to functions with always-inline attributes
+ // that are viable for inlining. FIXME: We shouldn't even get here for
+ // declarations.
+ if (Callee && !Callee->isDeclaration() &&
+ CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
+ return InlineCost::getAlways();
+
+ return InlineCost::getNever();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 0000000..45609f8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,110 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+namespace {
+
+/// \brief Actual inliner pass implementation.
+///
+/// The common implementation of the inlining logic is shared between this
+/// inliner pass and the always inliner pass. The two passes use different cost
+/// analyses to determine when to inline.
+class SimpleInliner : public Inliner {
+
+public:
+ SimpleInliner() : Inliner(ID) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ SimpleInliner(int Threshold)
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ InlineCost getInlineCost(CallSite CS) override {
+ Function *Callee = CS.getCalledFunction();
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+ return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ TargetTransformInfoWrapperPass *TTIWP;
+};
+
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ if (OptLevel > 2)
+ return 275;
+ if (SizeOptLevel == 1) // -Os
+ return 75;
+ if (SizeOptLevel == 2) // -Oz
+ return 25;
+ return 225;
+}
+
+} // end anonymous namespace
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+ return new SimpleInliner(Threshold);
+}
+
+Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ return new SimpleInliner(
+ computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
+
+bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ return Inliner::runOnSCC(SCC);
+}
+
+void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ Inliner::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 0000000..bbe5f876
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,741 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph. The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+// This weirdly named statistic tracks the number of times that, when attempting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
+
+// We instroduce this threshold to help performance of instrumentation based
+// PGO before we actually hook up inliner with analysis passes such as BPI and
+// BFI.
+static cl::opt<int>
+ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
+ cl::desc("Threshold for inlining functions with cold attribute"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
+
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
+ : CallGraphSCCPass(ID),
+ InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+ : Threshold),
+ InsertLifetime(InsertLifetime) {}
+
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
+}
+
+
+typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR. The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller. If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory, bool InsertLifetime) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // We need to manually construct BasicAA directly in order to disable
+ // its use of other function analyses.
+ BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+ // Construct our own AA results for this function. We do this manually to
+ // work around the limitations of the legacy pass manager.
+ AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
+ return false;
+
+ AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+ for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (!ATy || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (AllocaInst *AvailableAlloca : AllocasForType) {
+
+ unsigned Align1 = AI->getAlignment(),
+ Align2 = AvailableAlloca->getAlignment();
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca).second)
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
+
+ // Move affected dbg.declare calls immediately after the new alloca to
+ // avoid the situation when a dbg.declare preceeds its alloca.
+ if (auto *L = LocalAsMetadata::getIfExists(AI))
+ if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+ for (User *U : MDV->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+ DDI->moveBefore(AvailableAlloca->getNextNode());
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+
+ if (Align1 != Align2) {
+ if (!Align1 || !Align2) {
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+ unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
+
+ Align1 = Align1 ? Align1 : TypeAlign;
+ Align2 = Align2 ? Align2 : TypeAlign;
+ }
+
+ if (Align1 > Align2)
+ AvailableAlloca->setAlignment(AI->getAlignment());
+ }
+
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ ++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = nullptr;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+ int Threshold = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
+
+ // If -inline-threshold is not given, listen to the optsize attribute when it
+ // would decrease the threshold.
+ Function *Caller = CS.getCaller();
+ bool OptSize = Caller && !Caller->isDeclaration() &&
+ // FIXME: Use Function::optForSize().
+ Caller->hasFnAttribute(Attribute::OptimizeForSize);
+ if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
+ OptSizeThreshold < Threshold)
+ Threshold = OptSizeThreshold;
+
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ return Threshold;
+
+ // If profile information is available, use that to adjust threshold of hot
+ // and cold functions.
+ // FIXME: The heuristic used below for determining hotness and coldness are
+ // based on preliminary SPEC tuning and may not be optimal. Replace this with
+ // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+ uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+ bool HasPGOCounts = false;
+ if (Callee->getEntryCount() &&
+ Callee->getParent()->getMaximumFunctionCount()) {
+ HasPGOCounts = true;
+ FunctionCount = Callee->getEntryCount().getValue();
+ MaxFunctionCount =
+ Callee->getParent()->getMaximumFunctionCount().getValue();
+ }
+
+ // Listen to the inlinehint attribute or profile based hotness information
+ // when it would increase the threshold and the caller does not need to
+ // minimize its size.
+ bool InlineHint =
+ Callee->hasFnAttribute(Attribute::InlineHint) ||
+ (HasPGOCounts &&
+ FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+ if (InlineHint && HintThreshold > Threshold &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
+ Threshold = HintThreshold;
+
+ // Listen to the cold attribute or profile based coldness information
+ // when it would decrease the threshold.
+ bool ColdCallee =
+ Callee->hasFnAttribute(Attribute::Cold) ||
+ (HasPGOCounts &&
+ FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
+ // Command line argument for InlineLimit will override the default
+ // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+ // do not use the default cold threshold even if it is smaller.
+ if ((InlineLimit.getNumOccurrences() == 0 ||
+ ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+ ColdThreshold < Threshold)
+ Threshold = ColdThreshold;
+
+ return Threshold;
+}
+
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+ Function *Caller = CS.getCaller();
+ LLVMContext &Ctx = Caller->getContext();
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
+/// Return true if the inliner should attempt to inline at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+ InlineCost IC = getInlineCost(CS);
+
+ if (IC.isAlways()) {
+ DEBUG(dbgs() << " Inlining: cost=always"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+ " should always be inlined (cost=always)");
+ return true;
+ }
+
+ if (IC.isNever()) {
+ DEBUG(dbgs() << " NOT Inlining: cost=never"
+ << ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " should never be inlined (cost=never)"));
+ return false;
+ }
+
+ Function *Caller = CS.getCaller();
+ if (!IC) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " too costly to inline (cost=") +
+ Twine(IC.getCost()) + ", threshold=" +
+ Twine(IC.getCostDelta() + IC.getCost()) + ")");
+ return false;
+ }
+
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
+ int TotalSecondaryCost = 0;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (User *U : Caller->users()) {
+ CallSite CS2(U);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+
+ InlineCost IC2 = getInlineCost(CS2);
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+ if (IC2.isAlways())
+ continue;
+
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += IC2.getCost();
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (callerWillBeRemoved && !Caller->use_empty())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << IC.getCost() <<
+ ", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(
+ CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
+ return false;
+ }
+ }
+
+ DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", Call: " << *CS.getInstruction() << '\n');
+ emitAnalysis(
+ CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+ CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+ " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+ return true;
+}
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ SmallPtrSet<Function*, 8> SCCFunctions;
+ DEBUG(dbgs() << "Inliner visiting SCC:");
+ for (CallGraphNode *Node : SCC) {
+ Function *F = Node->getFunction();
+ if (F) SCCFunctions.insert(F);
+ DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+ }
+
+ // Scan through and identify all call sites ahead of time so that we only
+ // inline call sites in the original functions, not call sites that result
+ // from inlining other functions.
+ SmallVector<std::pair<CallSite, int>, 16> CallSites;
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+ for (CallGraphNode *Node : SCC) {
+ Function *F = Node->getFunction();
+ if (!F) continue;
+
+ for (BasicBlock &BB : *F)
+ for (Instruction &I : BB) {
+ CallSite CS(cast<Value>(&I));
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (!CS || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (Function *Callee = CS.getCalledFunction())
+ if (Callee->isDeclaration())
+ continue;
+
+ CallSites.push_back(std::make_pair(CS, -1));
+ }
+ }
+
+ DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+ // If there are no calls in this function, exit early.
+ if (CallSites.empty())
+ return false;
+
+ // Now that we have all of the call sites, move the ones to functions in the
+ // current SCC to the end of the list.
+ unsigned FirstCallInSCC = CallSites.size();
+ for (unsigned i = 0; i < FirstCallInSCC; ++i)
+ if (Function *F = CallSites[i].first.getCalledFunction())
+ if (SCCFunctions.count(F))
+ std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ InlineFunctionInfo InlineInfo(&CG, ACT);
+
+ // Now that we have all of the call sites, loop over them and inline them if
+ // it looks profitable to do so.
+ bool Changed = false;
+ bool LocalChange;
+ do {
+ LocalChange = false;
+ // Iterate over the outer loop because inlining functions can cause indirect
+ // calls to become direct calls.
+ // CallSites may be modified inside so ranged for loop can not be used.
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi].first;
+
+ Function *Caller = CS.getCaller();
+ Function *Callee = CS.getCalledFunction();
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
+ DEBUG(dbgs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (!Callee || Callee->isDeclaration()) continue;
+
+ // If this call site was obtained by inlining another function, verify
+ // that the include path for the function did not include the callee
+ // itself. If so, we'd be recursively inlining the same function,
+ // which would provide the same callsites, which would cause us to
+ // infinitely inline.
+ int InlineHistoryID = CallSites[CSi].second;
+ if (InlineHistoryID != -1 &&
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ continue;
+
+ LLVMContext &CallerCtx = Caller->getContext();
+
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
+ continue;
+ }
+
+ // Attempt to inline the function.
+ if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID, InsertLifetime)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
+ continue;
+ }
+ ++NumInlined;
+
+ // Report the inline decision.
+ emitOptimizationRemark(
+ CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
+ // If inlining this function gave us any new call sites, throw them
+ // onto our worklist to process. They are useful inline candidates.
+ if (!InlineInfo.InlinedCalls.empty()) {
+ // Create a new inline history entry for this, so that we remember
+ // that these new callsites came about due to inlining Callee.
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+ for (Value *Ptr : InlineInfo.InlinedCalls)
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ }
+ }
+
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // TODO: Can remove if in SCC now.
+ !SCCFunctions.count(Callee) &&
+
+ // The function may be apparently dead, but if there are indirect
+ // callgraph references to the node, we cannot delete it yet, this
+ // could invalidate the CGSCC iterator.
+ CG[Callee]->getNumReferences() == 0) {
+ DEBUG(dbgs() << " -> Deleting dead function: "
+ << Callee->getName() << "\n");
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+
+ // Remove this call site from the list. If possible, use
+ // swap/pop_back for efficiency, but do not use it if doing so would
+ // move a call site to a function in this SCC before the
+ // 'FirstCallInSCC' barrier.
+ if (SCC.isSingular()) {
+ CallSites[CSi] = CallSites.back();
+ CallSites.pop_back();
+ } else {
+ CallSites.erase(CallSites.begin()+CSi);
+ }
+ --CSi;
+
+ Changed = true;
+ LocalChange = true;
+ }
+ } while (LocalChange);
+
+ return Changed;
+}
+
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG);
+}
+
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+ SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+ SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
+ SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+
+ auto RemoveCGN = [&](CallGraphNode *CGN) {
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.push_back(CGN);
+ };
+
+ // Scan for all of the functions, looking for ones that should now be removed
+ // from the program. Insert the dead ones in the FunctionsToRemove set.
+ for (const auto &I : CG) {
+ CallGraphNode *CGN = I.second.get();
+ Function *F = CGN->getFunction();
+ if (!F || F->isDeclaration())
+ continue;
+
+ // Handle the case when this function is called and we only want to care
+ // about always-inline functions. This is a bit of a hack to share code
+ // between here and the InlineAlways pass.
+ if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
+ continue;
+
+ // If the only remaining users of the function are dead constants, remove
+ // them.
+ F->removeDeadConstantUsers();
+
+ if (!F->isDefTriviallyDead())
+ continue;
+
+ // It is unsafe to drop a function with discardable linkage from a COMDAT
+ // without also dropping the other members of the COMDAT.
+ // The inliner doesn't visit non-function entities which are in COMDAT
+ // groups so it is unsafe to do so *unless* the linkage is local.
+ if (!F->hasLocalLinkage()) {
+ if (const Comdat *C = F->getComdat()) {
+ --ComdatEntriesAlive[C];
+ DeadFunctionsInComdats.push_back(CGN);
+ continue;
+ }
+ }
+
+ RemoveCGN(CGN);
+ }
+ if (!DeadFunctionsInComdats.empty()) {
+ // Count up all the entities in COMDAT groups
+ auto ComdatGroupReferenced = [&](const Comdat *C) {
+ auto I = ComdatEntriesAlive.find(C);
+ if (I != ComdatEntriesAlive.end())
+ ++(I->getSecond());
+ };
+ for (const Function &F : CG.getModule())
+ if (const Comdat *C = F.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalVariable &GV : CG.getModule().globals())
+ if (const Comdat *C = GV.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalAlias &GA : CG.getModule().aliases())
+ if (const Comdat *C = GA.getComdat())
+ ComdatGroupReferenced(C);
+ for (CallGraphNode *CGN : DeadFunctionsInComdats) {
+ Function *F = CGN->getFunction();
+ const Comdat *C = F->getComdat();
+ int NumAlive = ComdatEntriesAlive[C];
+ // We can remove functions in a COMDAT group if the entire group is dead.
+ assert(NumAlive >= 0);
+ if (NumAlive > 0)
+ continue;
+
+ RemoveCGN(CGN);
+ }
+ }
+
+ if (FunctionsToRemove.empty())
+ return false;
+
+ // Now that we know which functions to delete, do so. We didn't want to do
+ // this inline, because that would invalidate our CallGraph::iterator
+ // objects. :(
+ //
+ // Note that it doesn't matter that we are iterating over a non-stable order
+ // here to do this, it doesn't matter which order the functions are deleted
+ // in.
+ array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+ FunctionsToRemove.end()),
+ FunctionsToRemove.end());
+ for (CallGraphNode *CGN : FunctionsToRemove) {
+ delete CG.removeFunctionFromModule(CGN);
+ ++NumDeleted;
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 0000000..21bb5d0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,269 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions and variables in the input module.
+// If the function or variable is not in the list of external names given to
+// the pass it is marked as internal.
+//
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
+//
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "internalize"
+
+STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+
+namespace {
+ class InternalizePass : public ModulePass {
+ std::set<std::string> ExternalNames;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit InternalizePass();
+ explicit InternalizePass(ArrayRef<const char *> ExportList);
+ void LoadFile(const char *Filename);
+ bool maybeInternalize(GlobalValue &GV,
+ const std::set<const Comdat *> &ExternalComdats);
+ void checkComdatVisibility(GlobalValue &GV,
+ std::set<const Comdat *> &ExternalComdats);
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraphWrapperPass>();
+ }
+ };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass() : ModulePass(ID) {
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ if (!APIFile.empty()) // If a filename is specified, use it.
+ LoadFile(APIFile.c_str());
+ ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
+ : ModulePass(ID) {
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+ for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+ itr != ExportList.end(); itr++) {
+ ExternalNames.insert(*itr);
+ }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
+}
+
+static bool isExternallyVisible(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
+ // Function must be defined here
+ if (GV.isDeclaration())
+ return true;
+
+ // Available externally is really just a "declaration with a body".
+ if (GV.hasAvailableExternallyLinkage())
+ return true;
+
+ // Assume that dllexported symbols are referenced elsewhere
+ if (GV.hasDLLExportStorageClass())
+ return true;
+
+ // Marked to keep external?
+ if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+ return true;
+
+ return false;
+}
+
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+ GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ if (Comdat *C = GV.getComdat()) {
+ if (ExternalComdats.count(C))
+ return false;
+
+ // If a comdat is not externally visible we can drop it.
+ if (auto GO = dyn_cast<GlobalObject>(&GV))
+ GO->setComdat(nullptr);
+
+ if (GV.hasLocalLinkage())
+ return false;
+ } else {
+ if (GV.hasLocalLinkage())
+ return false;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ return false;
+ }
+
+ GV.setVisibility(GlobalValue::DefaultVisibility);
+ GV.setLinkage(GlobalValue::InternalLinkage);
+ return true;
+}
+
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+ GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ Comdat *C = GV.getComdat();
+ if (!C)
+ return;
+
+ if (isExternallyVisible(GV, ExternalNames))
+ ExternalComdats.insert(C);
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+ CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
+ CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
+
+ SmallPtrSet<GlobalValue *, 8> Used;
+ collectUsedGlobalVariables(M, Used, false);
+
+ // Collect comdat visiblity information for the module.
+ std::set<const Comdat *> ExternalComdats;
+ if (!M.getComdatSymbolTable().empty()) {
+ for (Function &F : M)
+ checkComdatVisibility(F, ExternalComdats);
+ for (GlobalVariable &GV : M.globals())
+ checkComdatVisibility(GV, ExternalComdats);
+ for (GlobalAlias &GA : M.aliases())
+ checkComdatVisibility(GA, ExternalComdats);
+ }
+
+ // We must assume that globals in llvm.used have a reference that not even
+ // the linker can see, so we don't internalize them.
+ // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
+ // linker can drop those symbols. If this pass is running as part of LTO,
+ // one might think that it could just drop llvm.compiler.used. The problem
+ // is that even in LTO llvm doesn't see every reference. For example,
+ // we don't see references from function local inline assembly. To be
+ // conservative, we internalize symbols in llvm.compiler.used, but we
+ // keep llvm.compiler.used so that the symbol is not deleted by llvm.
+ for (GlobalValue *V : Used) {
+ ExternalNames.insert(V->getName());
+ }
+
+ // Mark all functions not in the api as internal.
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ExternalComdats))
+ continue;
+
+ if (ExternalNode)
+ // Remove a callgraph edge from the external node to this function.
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
+
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
+ }
+
+ // Never internalize the llvm.used symbol. It is used to implement
+ // attribute((used)).
+ // FIXME: Shouldn't this just filter on llvm.metadata section??
+ ExternalNames.insert("llvm.used");
+ ExternalNames.insert("llvm.compiler.used");
+
+ // Never internalize anchors used by the machine module info, else the info
+ // won't find them. (see MachineModuleInfo.)
+ ExternalNames.insert("llvm.global_ctors");
+ ExternalNames.insert("llvm.global_dtors");
+ ExternalNames.insert("llvm.global.annotations");
+
+ // Never internalize symbols code-gen inserts.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
+ ExternalNames.insert("__stack_chk_guard");
+
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (!maybeInternalize(*I, ExternalComdats))
+ continue;
+
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
+
+ // Mark all aliases that are not in the api as internal as well.
+ for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I != E; ++I) {
+ if (!maybeInternalize(*I, ExternalComdats))
+ continue;
+
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
+
+ // We do not keep track of whether this pass changed the module because
+ // it adds unnecessary complexity:
+ // 1) This pass will generally be near the start of the pass pipeline, so
+ // there will be no analyses to invalidate.
+ // 2) This pass will most likely end up changing the module and it isn't worth
+ // worrying about optimizing the case where the module is unchanged.
+ return true;
+}
+
+ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
+
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+ return new InternalizePass(ExportList);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 0000000..8e4ad64
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,310 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-extract"
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+ struct LoopExtractor : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned NumLoops;
+
+ explicit LoopExtractor(unsigned numLoops = ~0)
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(BreakCriticalEdgesID);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+ };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+
+namespace {
+ /// SingleLoopExtractor - For bugpoint.
+ struct SingleLoopExtractor : public LoopExtractor {
+ static char ID; // Pass identification, replacement for typeid
+ SingleLoopExtractor() : LoopExtractor(1) {}
+ };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ // Only visit top-level loops.
+ if (L->getParentLoop())
+ return false;
+
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ bool Changed = false;
+
+ // If there is more than one top-level loop in this function, extract all of
+ // the loops. Otherwise there is exactly one top-level loop; in this case if
+ // this function is more than a minimal wrapper around the loop, extract
+ // the loop.
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ TerminatorInst *EntryTI =
+ L->getHeader()->getParent()->getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != L->getHeader()) {
+ ShouldExtractLoop = true;
+ } else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop) {
+ // We must omit EH pads. EH pads must accompany the invoke
+ // instruction. But this would result in a loop in the extracted
+ // function. An infinite cycle occurs when it tries to extract that loop as
+ // well.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i]->isEHPad()) {
+ ShouldExtractLoop = false;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop) {
+ if (NumLoops == 0) return Changed;
+ --NumLoops;
+ CodeExtractor Extractor(DT, *L);
+ if (Extractor.extractCodeRegion() != nullptr) {
+ Changed = true;
+ // After extraction, the loop is replaced by a function call, so
+ // we shouldn't try to run any more loop passes on it.
+ LI.updateUnloop(L);
+ }
+ ++NumExtracted;
+ }
+
+ return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can. This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+ return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+
+namespace {
+ /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+ /// from the module into their own functions except for those specified by the
+ /// BlocksToNotExtract list.
+ class BlockExtractorPass : public ModulePass {
+ void LoadFile(const char *Filename);
+ void SplitLandingPadPreds(Function *F);
+
+ std::vector<BasicBlock*> BlocksToNotExtract;
+ std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ BlockExtractorPass() : ModulePass(ID) {
+ if (!BlockFile.empty())
+ LoadFile(BlockFile.c_str());
+ }
+
+ bool runOnModule(Module &M) override;
+ };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass() {
+ return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+ // Load the BlockFile...
+ std::ifstream In(Filename);
+ if (!In.good()) {
+ errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+ << "'!\n";
+ return;
+ }
+ while (In) {
+ std::string FunctionName, BlockName;
+ In >> FunctionName;
+ In >> BlockName;
+ if (!BlockName.empty())
+ BlocksToNotExtractByName.push_back(
+ std::make_pair(FunctionName, BlockName));
+ }
+}
+
+/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke
+/// instruction. The critical edge breaker will refuse to break critical edges
+/// to a landing pad. So do them here. After this method runs, all landing pads
+/// should have only one predecessor.
+void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ InvokeInst *II = dyn_cast<InvokeInst>(I);
+ if (!II) continue;
+ BasicBlock *Parent = II->getParent();
+ BasicBlock *LPad = II->getUnwindDest();
+
+ // Look through the landing pad's predecessors. If one of them ends in an
+ // 'invoke', then we want to split the landing pad.
+ bool Split = false;
+ for (pred_iterator
+ PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (BB->isLandingPad() && BB != Parent &&
+ isa<InvokeInst>(Parent->getTerminator())) {
+ Split = true;
+ break;
+ }
+ }
+
+ if (!Split) continue;
+
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
+ }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+ std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+ for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+ BasicBlock *BB = BlocksToNotExtract[i];
+ Function *F = BB->getParent();
+
+ // Map the corresponding function in this module.
+ Function *MF = M.getFunction(F->getName());
+ assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+ // Figure out which index the basic block is in its function.
+ Function::iterator BBI = MF->begin();
+ std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+ TranslatedBlocksToNotExtract.insert(&*BBI);
+ }
+
+ while (!BlocksToNotExtractByName.empty()) {
+ // There's no way to find BBs by name without looking at every BB inside
+ // every Function. Fortunately, this is always empty except when used by
+ // bugpoint in which case correctness is more important than performance.
+
+ std::string &FuncName = BlocksToNotExtractByName.back().first;
+ std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+ Function &F = *FI;
+ if (F.getName() != FuncName) continue;
+
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock &BB = *BI;
+ if (BB.getName() != BlockName) continue;
+
+ TranslatedBlocksToNotExtract.insert(&*BI);
+ }
+ }
+
+ BlocksToNotExtractByName.pop_back();
+ }
+
+ // Now that we know which blocks to not extract, figure out which ones we WANT
+ // to extract.
+ std::vector<BasicBlock*> BlocksToExtract;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ SplitLandingPadPreds(&*F);
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+ if (!TranslatedBlocksToNotExtract.count(&*BB))
+ BlocksToExtract.push_back(&*BB);
+ }
+
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+ SmallVector<BasicBlock*, 2> BlocksToExtractVec;
+ BlocksToExtractVec.push_back(BlocksToExtract[i]);
+ if (const InvokeInst *II =
+ dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ }
+
+ return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 0000000..7b51574
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,1055 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+static cl::opt<bool> AvoidReuse(
+ "lowerbitsets-avoid-reuse",
+ cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+ cl::Hidden, cl::init(true));
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+ if (Offset < ByteOffset)
+ return false;
+
+ if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+ return false;
+
+ uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+ if (BitOffset >= BitSize)
+ return false;
+
+ return Bits.count(BitOffset);
+}
+
+bool BitSetInfo::containsValue(
+ const DataLayout &DL,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
+ uint64_t COffset) const {
+ if (auto GV = dyn_cast<GlobalObject>(V)) {
+ auto I = GlobalLayout.find(GV);
+ if (I == GlobalLayout.end())
+ return false;
+ return containsGlobalOffset(I->second + COffset);
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+ COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+ containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
+void BitSetInfo::print(raw_ostream &OS) const {
+ OS << "offset " << ByteOffset << " size " << BitSize << " align "
+ << (1 << AlignLog2);
+
+ if (isAllOnes()) {
+ OS << " all-ones\n";
+ return;
+ }
+
+ OS << " { ";
+ for (uint64_t B : Bits)
+ OS << B << ' ';
+ OS << "}\n";
+}
+
+BitSetInfo BitSetBuilder::build() {
+ if (Min > Max)
+ Min = 0;
+
+ // Normalize each offset against the minimum observed offset, and compute
+ // the bitwise OR of each of the offsets. The number of trailing zeros
+ // in the mask gives us the log2 of the alignment of all offsets, which
+ // allows us to compress the bitset by only storing one bit per aligned
+ // address.
+ uint64_t Mask = 0;
+ for (uint64_t &Offset : Offsets) {
+ Offset -= Min;
+ Mask |= Offset;
+ }
+
+ BitSetInfo BSI;
+ BSI.ByteOffset = Min;
+
+ BSI.AlignLog2 = 0;
+ if (Mask != 0)
+ BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+ // Build the compressed bitset while normalizing the offsets against the
+ // computed alignment.
+ BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+ for (uint64_t Offset : Offsets) {
+ Offset >>= BSI.AlignLog2;
+ BSI.Bits.insert(Offset);
+ }
+
+ return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+ // Create a new fragment to hold the layout for F.
+ Fragments.emplace_back();
+ std::vector<uint64_t> &Fragment = Fragments.back();
+ uint64_t FragmentIndex = Fragments.size() - 1;
+
+ for (auto ObjIndex : F) {
+ uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+ if (OldFragmentIndex == 0) {
+ // We haven't seen this object index before, so just add it to the current
+ // fragment.
+ Fragment.push_back(ObjIndex);
+ } else {
+ // This index belongs to an existing fragment. Copy the elements of the
+ // old fragment into this one and clear the old fragment. We don't update
+ // the fragment map just yet, this ensures that any further references to
+ // indices from the old fragment in this fragment do not insert any more
+ // indices.
+ std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+ Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+ OldFragment.clear();
+ }
+ }
+
+ // Update the fragment map to point our object indices to this fragment.
+ for (uint64_t ObjIndex : Fragment)
+ FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+ uint64_t BitSize, uint64_t &AllocByteOffset,
+ uint8_t &AllocMask) {
+ // Find the smallest current allocation.
+ unsigned Bit = 0;
+ for (unsigned I = 1; I != BitsPerByte; ++I)
+ if (BitAllocs[I] < BitAllocs[Bit])
+ Bit = I;
+
+ AllocByteOffset = BitAllocs[Bit];
+
+ // Add our size to it.
+ unsigned ReqSize = AllocByteOffset + BitSize;
+ BitAllocs[Bit] = ReqSize;
+ if (Bytes.size() < ReqSize)
+ Bytes.resize(ReqSize);
+
+ // Set our bits.
+ AllocMask = 1 << Bit;
+ for (uint64_t B : Bits)
+ Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
+namespace {
+
+struct ByteArrayInfo {
+ std::set<uint64_t> Bits;
+ uint64_t BitSize;
+ GlobalVariable *ByteArray;
+ Constant *Mask;
+};
+
+struct LowerBitSets : public ModulePass {
+ static char ID;
+ LowerBitSets() : ModulePass(ID) {
+ initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+
+ bool LinkerSubsectionsViaSymbols;
+ Triple::ArchType Arch;
+ Triple::ObjectFormatType ObjectFormat;
+ IntegerType *Int1Ty;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ Type *Int32PtrTy;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
+
+ // The llvm.bitsets named metadata.
+ NamedMDNode *BitSetNM;
+
+ // Mapping from bitset identifiers to the call sites that test them.
+ DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
+
+ std::vector<ByteArrayInfo> ByteArrayInfos;
+
+ BitSetInfo
+ buildBitSet(Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+ void allocateByteArrays();
+ Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *BitOffset);
+ void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+ Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ Value *
+ lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Constant *CombinedGlobal,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalVariable *> Globals);
+ unsigned getJumpTableEntrySize();
+ Type *getJumpTableEntryType();
+ Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance);
+ void verifyBitSetMDNode(MDNode *Op);
+ void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions);
+ void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+ ArrayRef<GlobalObject *> Globals);
+ bool buildBitSets();
+ bool eraseBitSetMetadata();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &Mod) {
+ M = &Mod;
+ const DataLayout &DL = Mod.getDataLayout();
+
+ Triple TargetTriple(M->getTargetTriple());
+ LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+ Arch = TargetTriple.getArch();
+ ObjectFormat = TargetTriple.getObjectFormat();
+
+ Int1Ty = Type::getInt1Ty(M->getContext());
+ Int8Ty = Type::getInt8Ty(M->getContext());
+ Int32Ty = Type::getInt32Ty(M->getContext());
+ Int32PtrTy = PointerType::getUnqual(Int32Ty);
+ Int64Ty = Type::getInt64Ty(M->getContext());
+ IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+
+ BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ BitSetTestCallSites.clear();
+
+ return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+ Metadata *BitSet,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ BitSetBuilder BSB;
+
+ // Compute the byte offset of each element of this bitset.
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+ Constant *OpConst =
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+ if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+ OpConst = GA->getAliasee();
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
+ if (!OpGlobal)
+ continue;
+ uint64_t Offset =
+ cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+ ->getValue())->getZExtValue();
+
+ Offset += GlobalLayout.find(OpGlobal)->second;
+
+ BSB.addOffset(Offset);
+ }
+ }
+
+ return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+ Value *BitOffset) {
+ auto BitsType = cast<IntegerType>(Bits->getType());
+ unsigned BitWidth = BitsType->getBitWidth();
+
+ BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+ Value *BitIndex =
+ B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+ Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+ Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+ return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+ // Create globals to stand in for byte arrays and masks. These never actually
+ // get initialized, we RAUW and erase them later in allocateByteArrays() once
+ // we know the offset and mask to use.
+ auto ByteArrayGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+ auto MaskGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+ ByteArrayInfos.emplace_back();
+ ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+ BAI->Bits = BSI.Bits;
+ BAI->BitSize = BSI.BitSize;
+ BAI->ByteArray = ByteArrayGlobal;
+ BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+ std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
+
+ std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+ ByteArrayBuilder BAB;
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ uint8_t Mask;
+ BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+ BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+ cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ }
+
+ Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+ auto ByteArray =
+ new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, ByteArrayConst);
+
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+ ByteArrayConst->getType(), ByteArray, Idxs);
+
+ // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+ // that the pc-relative displacement is folded into the lea instead of the
+ // test instruction getting another displacement.
+ if (LinkerSubsectionsViaSymbols) {
+ BAI->ByteArray->replaceAllUsesWith(GEP);
+ } else {
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
+ BAI->ByteArray->replaceAllUsesWith(Alias);
+ }
+ BAI->ByteArray->eraseFromParent();
+ }
+
+ ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+ BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+ BAB.BitAllocs[6] + BAB.BitAllocs[7];
+ ByteArraySizeBytes = BAB.Bytes.size();
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+ ByteArrayInfo *&BAI, Value *BitOffset) {
+ if (BSI.BitSize <= 64) {
+ // If the bit set is sufficiently small, we can avoid a load by bit testing
+ // a constant.
+ IntegerType *BitsTy;
+ if (BSI.BitSize <= 32)
+ BitsTy = Int32Ty;
+ else
+ BitsTy = Int64Ty;
+
+ uint64_t Bits = 0;
+ for (auto Bit : BSI.Bits)
+ Bits |= uint64_t(1) << Bit;
+ Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+ return createMaskedBitTest(B, BitsConst, BitOffset);
+ } else {
+ if (!BAI) {
+ ++NumByteArraysCreated;
+ BAI = createByteArray(BSI);
+ }
+
+ Constant *ByteArray = BAI->ByteArray;
+ Type *Ty = BAI->ByteArray->getValueType();
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ // Each use of the byte array uses a different alias. This makes the
+ // backend less likely to reuse previously computed byte array addresses,
+ // improving the security of the CFI mechanism based on this pass.
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
+ GlobalValue::PrivateLinkage, "bits_use",
+ ByteArray, M);
+ }
+
+ Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *Byte = B.CreateLoad(ByteAddr);
+
+ Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
+ }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+ CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Constant *CombinedGlobalIntAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ Value *Ptr = CI->getArgOperand(0);
+ const DataLayout &DL = M->getDataLayout();
+
+ if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ return ConstantInt::getTrue(M->getContext());
+
+ Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+ CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+ BasicBlock *InitialBB = CI->getParent();
+
+ IRBuilder<> B(CI);
+
+ Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+ if (BSI.isSingleOffset())
+ return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *BitOffset;
+ if (BSI.AlignLog2 == 0) {
+ BitOffset = PtrOffset;
+ } else {
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset,
+ ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
+ BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+ }
+
+ Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+ // If the bit set is all ones, testing against it is unnecessary.
+ if (BSI.isAllOnes())
+ return OffsetInRange;
+
+ TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+ IRBuilder<> ThenB(Term);
+
+ // Now that we know that the offset is in range and aligned, load the
+ // appropriate bit from the bitset.
+ Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+
+ // The value we want is 0 if we came directly from the initial block
+ // (having failed the range or alignment checks), or the loaded bit if
+ // we came from the block in which we loaded it.
+ B.SetInsertPoint(CI);
+ PHINode *P = B.CreatePHI(Int1Ty, 2);
+ P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+ P->addIncoming(Bit, ThenB.GetInsertBlock());
+ return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
+ // Build a new global with the combined contents of the referenced globals.
+ // This global is a struct whose even-indexed elements contain the original
+ // contents of the referenced globals and whose odd-indexed elements contain
+ // any padding required to align the next element to the next power of 2.
+ std::vector<Constant *> GlobalInits;
+ const DataLayout &DL = M->getDataLayout();
+ for (GlobalVariable *G : Globals) {
+ GlobalInits.push_back(G->getInitializer());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
+
+ // Compute the amount of padding required.
+ uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+ // Cap at 128 was found experimentally to have a good data/instruction
+ // overhead tradeoff.
+ if (Padding > 128)
+ Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+ if (!GlobalInits.empty())
+ GlobalInits.pop_back();
+ Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+ auto *CombinedGlobal =
+ new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, NewInit);
+
+ StructType *NewTy = cast<StructType>(NewInit->getType());
+ const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
+
+ // Compute the offsets of the original globals within the new global.
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ // Multiply by 2 to account for padding elements.
+ GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+ lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
+
+ // Build aliases pointing to offsets into the combined global for each
+ // global from which we built the combined global, and replace references
+ // to the original globals with references to the aliases.
+ for (unsigned I = 0; I != Globals.size(); ++I) {
+ // Multiply by 2 to account for padding elements.
+ Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, I * 2)};
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+ NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
+ if (LinkerSubsectionsViaSymbols) {
+ Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ assert(Globals[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+ Globals[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Globals[I]->getVisibility());
+ GAlias->takeName(Globals[I]);
+ Globals[I]->replaceAllUsesWith(GAlias);
+ }
+ Globals[I]->eraseFromParent();
+ }
+}
+
+void LowerBitSets::lowerBitSetCalls(
+ ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+ Constant *CombinedGlobalIntAddr =
+ ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+ // For each bitset in this disjoint set...
+ for (Metadata *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+ DEBUG({
+ if (auto BSS = dyn_cast<MDString>(BS))
+ dbgs() << BSS->getString() << ": ";
+ else
+ dbgs() << "<unnamed>: ";
+ BSI.print(dbgs());
+ });
+
+ ByteArrayInfo *BAI = nullptr;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+ if (!Op->getOperand(1))
+ return;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+ if (!OpGlobal)
+ return;
+
+ if (OpGlobal->isThreadLocal())
+ report_fatal_error("Bit set element may not be thread-local");
+ if (OpGlobal->hasSection())
+ report_fatal_error("Bit set element may not have an explicit section");
+
+ if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+ report_fatal_error("Bit set global var element must be a definition");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+ unsigned Distance) {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ const unsigned kJmpPCRel32Code = 0xe9;
+ const unsigned kInt3Code = 0xcc;
+
+ ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+ // Build a constant representing the displacement between the constant's
+ // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+ Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+ Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+ Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+ ConstantInt *DispOffset =
+ ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+ Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+ OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+ ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+ Constant *Fields[] = {
+ Jmp, OffsetedDisp, Int3, Int3, Int3,
+ };
+ return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+ if (Arch != Triple::x86 && Arch != Triple::x86_64)
+ report_fatal_error("Unsupported architecture for jump tables");
+
+ return StructType::get(M->getContext(),
+ {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+ /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ ArrayRef<Function *> Functions) {
+ // Unlike the global bitset builder, the function bitset builder cannot
+ // re-arrange functions in a particular order and base its calculations on the
+ // layout of the functions' entry points, as we have no idea how large a
+ // particular function will end up being (the size could even depend on what
+ // this pass does!) Instead, we build a jump table, which is a block of code
+ // consisting of one branch instruction for each of the functions in the bit
+ // set that branches to the target function, and redirect any taken function
+ // addresses to the corresponding jump table entry. In the object file's
+ // symbol table, the symbols for the target functions also refer to the jump
+ // table entries, so that addresses taken outside the module will pass any
+ // verification done inside the module.
+ //
+ // In more concrete terms, suppose we have three functions f, g, h which are
+ // members of a single bitset, and a function foo that returns their
+ // addresses:
+ //
+ // f:
+ // mov 0, %eax
+ // ret
+ //
+ // g:
+ // mov 1, %eax
+ // ret
+ //
+ // h:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // To create a jump table for these functions, we instruct the LLVM code
+ // generator to output a jump table in the .text section. This is done by
+ // representing the instructions in the jump table as an LLVM constant and
+ // placing them in a global variable in the .text section. The end result will
+ // (conceptually) look like this:
+ //
+ // f:
+ // jmp .Ltmp0 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // g:
+ // jmp .Ltmp1 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // h:
+ // jmp .Ltmp2 ; 5 bytes
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ // int3 ; 1 byte
+ //
+ // .Ltmp0:
+ // mov 0, %eax
+ // ret
+ //
+ // .Ltmp1:
+ // mov 1, %eax
+ // ret
+ //
+ // .Ltmp2:
+ // mov 2, %eax
+ // ret
+ //
+ // foo:
+ // mov f, %eax
+ // mov g, %edx
+ // mov h, %ecx
+ // ret
+ //
+ // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+ // normal case the check can be carried out using the same kind of simple
+ // arithmetic that we normally use for globals.
+
+ assert(!Functions.empty());
+
+ // Build a simple layout based on the regular layout of jump tables.
+ DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+ unsigned EntrySize = getJumpTableEntrySize();
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ GlobalLayout[Functions[I]] = I * EntrySize;
+
+ // Create a constant to hold the jump table.
+ ArrayType *JumpTableType =
+ ArrayType::get(getJumpTableEntryType(), Functions.size());
+ auto JumpTable = new GlobalVariable(*M, JumpTableType,
+ /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, nullptr);
+ JumpTable->setSection(ObjectFormat == Triple::MachO
+ ? "__TEXT,__text,regular,pure_instructions"
+ : ".text");
+ lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+ // Build aliases pointing to offsets into the jump table, and replace
+ // references to the original functions with references to the aliases.
+ for (unsigned I = 0; I != Functions.size(); ++I) {
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+ ConstantExpr::getGetElementPtr(
+ JumpTableType, JumpTable,
+ ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, I)}),
+ Functions[I]->getType());
+ if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+ Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ assert(Functions[I]->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+ Functions[I]->getLinkage(), "",
+ CombinedGlobalElemPtr, M);
+ GAlias->setVisibility(Functions[I]->getVisibility());
+ GAlias->takeName(Functions[I]);
+ Functions[I]->replaceAllUsesWith(GAlias);
+ }
+ if (!Functions[I]->isDeclarationForLinker())
+ Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+ }
+
+ // Build and set the jump table's initializer.
+ std::vector<Constant *> JumpTableEntries;
+ for (unsigned I = 0; I != Functions.size(); ++I)
+ JumpTableEntries.push_back(
+ createJumpTableEntry(JumpTable, Functions[I], I));
+ JumpTable->setInitializer(
+ ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+ ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+ llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+ for (unsigned I = 0; I != BitSets.size(); ++I)
+ BitSetIndices[BitSets[I]] = I;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ GlobalIndices[Globals[I]] = I;
+
+ // For each bitset, build a set of indices that refer to globals referenced by
+ // the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(Op->getOperand(0));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ if (!OpGlobal)
+ continue;
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+ // close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build the bitsets from this disjoint set.
+ if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+ // Build a vector of global variables with the computed layout.
+ std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+ auto OGI = OrderedGVs.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+ if (!GV)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OGI++ = GV;
+ }
+ }
+
+ buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+ } else {
+ // Build a vector of functions with the computed layout.
+ std::vector<Function *> OrderedFns(Globals.size());
+ auto OFI = OrderedFns.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ auto Fn = dyn_cast<Function>(Globals[Offset]);
+ if (!Fn)
+ report_fatal_error(
+ "Bit set may not contain both global variables and functions");
+ *OFI++ = Fn;
+ }
+ }
+
+ buildBitSetsFromFunctions(BitSets, OrderedFns);
+ }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets() {
+ Function *BitSetTestFunc =
+ M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+ if (!BitSetTestFunc)
+ return false;
+
+ // Equivalence class set containing bitsets and the globals they reference.
+ // This is used to partition the set of bitsets in the module into disjoint
+ // sets.
+ typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
+ GlobalClassesTy;
+ GlobalClassesTy GlobalClasses;
+
+ // Verify the bitset metadata and build a mapping from bitset identifiers to
+ // their last observed index in BitSetNM. This will used later to
+ // deterministically order the list of bitset identifiers.
+ llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+ if (BitSetNM) {
+ for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+ MDNode *Op = BitSetNM->getOperand(I);
+ verifyBitSetMDNode(Op);
+ BitSetIdIndices[Op->getOperand(0)] = I;
+ }
+ }
+
+ for (const Use &U : BitSetTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+
+ auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!BitSetMDVal)
+ report_fatal_error(
+ "Second argument of llvm.bitset.test must be metadata");
+ auto BitSet = BitSetMDVal->getMetadata();
+
+ // Add the call site to the list of call sites for this bit set. We also use
+ // BitSetTestCallSites to keep track of whether we have seen this bit set
+ // before. If we have, we don't need to re-add the referenced globals to the
+ // equivalence class.
+ std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
+ bool> Ins =
+ BitSetTestCallSites.insert(
+ std::make_pair(BitSet, std::vector<CallInst *>()));
+ Ins.first->second.push_back(CI);
+ if (!Ins.second)
+ continue;
+
+ // Add the bitset to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ if (!BitSetNM)
+ continue;
+
+ // Add the referenced globals to the bitset's equivalence class.
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+
+ auto OpGlobal = dyn_cast<GlobalObject>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ if (!OpGlobal)
+ continue;
+
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+ }
+ }
+
+ if (GlobalClasses.empty())
+ return false;
+
+ // Build a list of disjoint sets ordered by their maximum BitSetNM index
+ // for determinism.
+ std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
+ for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+ E = GlobalClasses.end();
+ I != E; ++I) {
+ if (!I->isLeader()) continue;
+ ++NumBitSetDisjointSets;
+
+ unsigned MaxIndex = 0;
+ for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<Metadata *>())
+ MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
+ }
+ Sets.emplace_back(I, MaxIndex);
+ }
+ std::sort(Sets.begin(), Sets.end(),
+ [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+ const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+ return S1.second < S2.second;
+ });
+
+ // For each disjoint set we found...
+ for (const auto &S : Sets) {
+ // Build the list of bitsets in this disjoint set.
+ std::vector<Metadata *> BitSets;
+ std::vector<GlobalObject *> Globals;
+ for (GlobalClassesTy::member_iterator MI =
+ GlobalClasses.member_begin(S.first);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<Metadata *>())
+ BitSets.push_back(MI->get<Metadata *>());
+ else
+ Globals.push_back(MI->get<GlobalObject *>());
+ }
+
+ // Order bitsets by BitSetNM index for determinism. This ordering is stable
+ // as there is a one-to-one mapping between metadata and indices.
+ std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+ return BitSetIdIndices[M1] < BitSetIdIndices[M2];
+ });
+
+ // Lower the bitsets in this disjoint set.
+ buildBitSetsFromDisjointSet(BitSets, Globals);
+ }
+
+ allocateByteArrays();
+
+ return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata() {
+ if (!BitSetNM)
+ return false;
+
+ M->eraseNamedMetadata(BitSetNM);
+ return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+ bool Changed = buildBitSets();
+ Changed |= eraseBitSetMetadata();
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 0000000..8a209a1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,1880 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// Order relation is defined on set of functions. It was made through
+// special function comparison procedure that returns
+// 0 when functions are equal,
+// -1 when Left function is less than right function, and
+// 1 for opposite case. We need total-ordering, so we need to maintain
+// four properties on the functions set:
+// a <= a (reflexivity)
+// if a <= b and b <= a then a = b (antisymmetry)
+// if a <= b and b <= c then a <= c (transitivity).
+// for all a and b: a <= b or b <= a (totality).
+//
+// Comparison iterates through each instruction in each basic block.
+// Functions are kept on binary tree. For each new function F we perform
+// lookup in binary tree.
+// In practice it works the following way:
+// -- We define Function* container class with custom "operator<" (FunctionPtr).
+// -- "FunctionPtr" instances are stored in std::set collection, so every
+// std::set::insert operation will give you result in log(N) time.
+//
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+// * Compare complex types with pointer types inside.
+// * Compare cross-reference cases.
+// * Compare complex expressions.
+//
+// All the three issues above could be described as ability to prove that
+// fA == fB == fC == fE == fF == fG in example below:
+//
+// void fA() {
+// fB();
+// }
+// void fB() {
+// fA();
+// }
+//
+// void fE() {
+// fF();
+// }
+// void fF() {
+// fG();
+// }
+// void fG() {
+// fE();
+// }
+//
+// Simplest cross-reference case (fA <--> fB) was implemented in previous
+// versions of MergeFunctions, though it presented only in two function pairs
+// in test-suite (that counts >50k functions)
+// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
+// could cover much more cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mergefunc"
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+static cl::opt<unsigned> NumFunctionsForSanityCheck(
+ "mergefunc-sanity",
+ cl::desc("How many functions in module could be used for "
+ "MergeFunctions pass sanity check. "
+ "'0' disables this check. Works only with '-debug' key."),
+ cl::init(0), cl::Hidden);
+
+namespace {
+
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+ struct Config : ValueMapConfig<GlobalValue*> {
+ enum { FollowRAUW = false };
+ };
+ // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+ // occurs, the mapping does not change. Tracking changes is unnecessary, and
+ // also problematic for weak symbols (which may be overwritten).
+ typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+ ValueNumberMap GlobalNumbers;
+ // The next unused serial number to assign to a global.
+ uint64_t NextNumber;
+ public:
+ GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+ uint64_t getNumber(GlobalValue* Global) {
+ ValueNumberMap::iterator MapIter;
+ bool Inserted;
+ std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+ if (Inserted)
+ NextNumber++;
+ return MapIter->second;
+ }
+ void clear() {
+ GlobalNumbers.clear();
+ }
+};
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. DataLayout is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const Function *F1, const Function *F2,
+ GlobalNumberState* GN)
+ : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
+
+ /// Test whether the two functions have equivalent behaviour.
+ int compare();
+ /// Hash a function. Equivalent functions will have the same hash, and unequal
+ /// functions will have different hashes with high probability.
+ typedef uint64_t FunctionHash;
+ static FunctionHash functionHash(Function &);
+
+private:
+ /// Test whether two basic blocks have equivalent behaviour.
+ int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
+
+ /// Constants comparison.
+ /// Its analog to lexicographical comparison between hypothetical numbers
+ /// of next format:
+ /// <bitcastability-trait><raw-bit-contents>
+ ///
+ /// 1. Bitcastability.
+ /// Check whether L's type could be losslessly bitcasted to R's type.
+ /// On this stage method, in case when lossless bitcast is not possible
+ /// method returns -1 or 1, thus also defining which type is greater in
+ /// context of bitcastability.
+ /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+ /// to the contents comparison.
+ /// If types differ, remember types comparison result and check
+ /// whether we still can bitcast types.
+ /// Stage 1: Types that satisfies isFirstClassType conditions are always
+ /// greater then others.
+ /// Stage 2: Vector is greater then non-vector.
+ /// If both types are vectors, then vector with greater bitwidth is
+ /// greater.
+ /// If both types are vectors with the same bitwidth, then types
+ /// are bitcastable, and we can skip other stages, and go to contents
+ /// comparison.
+ /// Stage 3: Pointer types are greater than non-pointers. If both types are
+ /// pointers of the same address space - go to contents comparison.
+ /// Different address spaces: pointer with greater address space is
+ /// greater.
+ /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+ /// We don't know how to bitcast them. So, we better don't do it,
+ /// and return types comparison result (so it determines the
+ /// relationship among constants we don't know how to bitcast).
+ ///
+ /// Just for clearance, let's see how the set of constants could look
+ /// on single dimension axis:
+ ///
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ /// Where: NFCT - Not a FirstClassType
+ /// FCT - FirstClassTyp:
+ ///
+ /// 2. Compare raw contents.
+ /// It ignores types on this stage and only compares bits from L and R.
+ /// Returns 0, if L and R has equivalent contents.
+ /// -1 or 1 if values are different.
+ /// Pretty trivial:
+ /// 2.1. If contents are numbers, compare numbers.
+ /// Ints with greater bitwidth are greater. Ints with same bitwidths
+ /// compared by their contents.
+ /// 2.2. "And so on". Just to avoid discrepancies with comments
+ /// perhaps it would be better to read the implementation itself.
+ /// 3. And again about overall picture. Let's look back at how the ordered set
+ /// of constants will look like:
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ ///
+ /// Now look, what could be inside [FCT, "others"], for example:
+ /// [FCT, "others"] =
+ /// [
+ /// [double 0.1], [double 1.23],
+ /// [i32 1], [i32 2],
+ /// { double 1.0 }, ; StructTyID, NumElements = 1
+ /// { i32 1 }, ; StructTyID, NumElements = 1
+ /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
+ /// { i32 1, double 1 } ; StructTyID, NumElements = 2
+ /// ]
+ ///
+ /// Let's explain the order. Float numbers will be less than integers, just
+ /// because of cmpType terms: FloatTyID < IntegerTyID.
+ /// Floats (with same fltSemantics) are sorted according to their value.
+ /// Then you can see integers, and they are, like a floats,
+ /// could be easy sorted among each others.
+ /// The structures. Structures are grouped at the tail, again because of their
+ /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+ /// Structures with greater number of elements are greater. Structures with
+ /// greater elements going first are greater.
+ /// The same logic with vectors, arrays and other possible complex types.
+ ///
+ /// Bitcastable constants.
+ /// Let's assume, that some constant, belongs to some group of
+ /// "so-called-equal" values with different types, and at the same time
+ /// belongs to another group of constants with equal types
+ /// and "really" equal values.
+ ///
+ /// Now, prove that this is impossible:
+ ///
+ /// If constant A with type TyA is bitcastable to B with type TyB, then:
+ /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+ /// those should be vectors (if TyA is vector), pointers
+ /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
+ /// be equal to TyB.
+ /// 2. All constants with non-equal, but bitcastable types to TyA, are
+ /// bitcastable to B.
+ /// Once again, just because we allow it to vectors and pointers only.
+ /// This statement could be expanded as below:
+ /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+ /// vector B, and thus bitcastable to B as well.
+ /// 2.2. All pointers of the same address space, no matter what they point to,
+ /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+ /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+ /// QED.
+ ///
+ /// In another words, for pointers and vectors, we ignore top-level type and
+ /// look at their particular properties (bit-width for vectors, and
+ /// address space for pointers).
+ /// If these properties are equal - compare their contents.
+ int cmpConstants(const Constant *L, const Constant *R);
+
+ /// Compares two global values by number. Uses the GlobalNumbersState to
+ /// identify the same gobals across function calls.
+ int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
+ /// Assign or look up previously assigned numbers for the two values, and
+ /// return whether the numbers are equal. Numbers are assigned in the order
+ /// visited.
+ /// Comparison order:
+ /// Stage 0: Value that is function itself is always greater then others.
+ /// If left and right values are references to their functions, then
+ /// they are equal.
+ /// Stage 1: Constants are greater than non-constants.
+ /// If both left and right are constants, then the result of
+ /// cmpConstants is used as cmpValues result.
+ /// Stage 2: InlineAsm instances are greater than others. If both left and
+ /// right are InlineAsm instances, InlineAsm* pointers casted to
+ /// integers and compared as numbers.
+ /// Stage 3: For all other cases we compare order we meet these values in
+ /// their functions. If right value was met first during scanning,
+ /// then left value is greater.
+ /// In another words, we compare serial numbers, for more details
+ /// see comments for sn_mapL and sn_mapR.
+ int cmpValues(const Value *L, const Value *R);
+
+ /// Compare two Instructions for equivalence, similar to
+ /// Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ /// Stages are listed in "most significant stage first" order:
+ /// On each stage below, we do comparison between some left and right
+ /// operation parts. If parts are non-equal, we assign parts comparison
+ /// result to the operation comparison result and exit from method.
+ /// Otherwise we proceed to the next stage.
+ /// Stages:
+ /// 1. Operations opcodes. Compared as numbers.
+ /// 2. Number of operands.
+ /// 3. Operation types. Compared with cmpType method.
+ /// 4. Compare operation subclass optional data as stream of bytes:
+ /// just convert it to integers and call cmpNumbers.
+ /// 5. Compare in operation operand types with cmpType in
+ /// most significant operand first order.
+ /// 6. Last stage. Check operations for some specific attributes.
+ /// For example, for Load it would be:
+ /// 6.1.Load: volatile (as boolean flag)
+ /// 6.2.Load: alignment (as integer numbers)
+ /// 6.3.Load: synch-scope (as integer numbers)
+ /// 6.4.Load: range metadata (as integer numbers)
+ /// On this stage its better to see the code, since its not more than 10-15
+ /// strings for particular instruction, and could change sometimes.
+ int cmpOperations(const Instruction *L, const Instruction *R) const;
+
+ /// Compare two GEPs for equivalent pointer arithmetic.
+ /// Parts to be compared for each comparison stage,
+ /// most significant stage first:
+ /// 1. Address space. As numbers.
+ /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
+ /// 3. Pointer operand type (using cmpType method).
+ /// 4. Number of operands.
+ /// 5. Compare operands, using cmpValues method.
+ int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ }
+
+ /// cmpType - compares two types,
+ /// defines total ordering among the types set.
+ ///
+ /// Return values:
+ /// 0 if types are equal,
+ /// -1 if Left is less than Right,
+ /// +1 if Left is greater than Right.
+ ///
+ /// Description:
+ /// Comparison is broken onto stages. Like in lexicographical comparison
+ /// stage coming first has higher priority.
+ /// On each explanation stage keep in mind total ordering properties.
+ ///
+ /// 0. Before comparison we coerce pointer types of 0 address space to
+ /// integer.
+ /// We also don't bother with same type at left and right, so
+ /// just return 0 in this case.
+ ///
+ /// 1. If types are of different kind (different type IDs).
+ /// Return result of type IDs comparison, treating them as numbers.
+ /// 2. If types are integers, check that they have the same width. If they
+ /// are vectors, check that they have the same count and subtype.
+ /// 3. Types have the same ID, so check whether they are one of:
+ /// * Void
+ /// * Float
+ /// * Double
+ /// * X86_FP80
+ /// * FP128
+ /// * PPC_FP128
+ /// * Label
+ /// * Metadata
+ /// We can treat these types as equal whenever their IDs are same.
+ /// 4. If Left and Right are pointers, return result of address space
+ /// comparison (numbers comparison). We can treat pointer types of same
+ /// address space as equal.
+ /// 5. If types are complex.
+ /// Then both Left and Right are to be expanded and their element types will
+ /// be checked with the same way. If we get Res != 0 on some stage, return it.
+ /// Otherwise return 0.
+ /// 6. For all other cases put llvm_unreachable.
+ int cmpTypes(Type *TyL, Type *TyR) const;
+
+ int cmpNumbers(uint64_t L, uint64_t R) const;
+ int cmpAPInts(const APInt &L, const APInt &R) const;
+ int cmpAPFloats(const APFloat &L, const APFloat &R) const;
+ int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+ int cmpMem(StringRef L, StringRef R) const;
+ int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+ int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+ int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
+
+ // The two functions undergoing comparison.
+ const Function *FnL, *FnR;
+
+ /// Assign serial numbers to values from left function, and values from
+ /// right function.
+ /// Explanation:
+ /// Being comparing functions we need to compare values we meet at left and
+ /// right sides.
+ /// Its easy to sort things out for external values. It just should be
+ /// the same value at left and right.
+ /// But for local values (those were introduced inside function body)
+ /// we have to ensure they were introduced at exactly the same place,
+ /// and plays the same role.
+ /// Let's assign serial number to each value when we meet it first time.
+ /// Values that were met at same place will be with same serial numbers.
+ /// In this case it would be good to explain few points about values assigned
+ /// to BBs and other ways of implementation (see below).
+ ///
+ /// 1. Safety of BB reordering.
+ /// It's safe to change the order of BasicBlocks in function.
+ /// Relationship with other functions and serial numbering will not be
+ /// changed in this case.
+ /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+ /// from the entry, and then take each terminator. So it doesn't matter how in
+ /// fact BBs are ordered in function. And since cmpValues are called during
+ /// this walk, the numbering depends only on how BBs located inside the CFG.
+ /// So the answer is - yes. We will get the same numbering.
+ ///
+ /// 2. Impossibility to use dominance properties of values.
+ /// If we compare two instruction operands: first is usage of local
+ /// variable AL from function FL, and second is usage of local variable AR
+ /// from FR, we could compare their origins and check whether they are
+ /// defined at the same place.
+ /// But, we are still not able to compare operands of PHI nodes, since those
+ /// could be operands from further BBs we didn't scan yet.
+ /// So it's impossible to use dominance properties in general.
+ DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+ // The global state we will use
+ GlobalNumberState* GlobalNumbers;
+};
+
+class FunctionNode {
+ mutable AssertingVH<Function> F;
+ FunctionComparator::FunctionHash Hash;
+public:
+ // Note the hash is recalculated potentially multiple times, but it is cheap.
+ FunctionNode(Function *F)
+ : F(F), Hash(FunctionComparator::functionHash(*F)) {}
+ Function *getFunc() const { return F; }
+ FunctionComparator::FunctionHash getHash() const { return Hash; }
+
+ /// Replace the reference to the function F by the function G, assuming their
+ /// implementations are equal.
+ void replaceBy(Function *G) const {
+ F = G;
+ }
+
+ void release() { F = nullptr; }
+};
+} // end anonymous namespace
+
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+ if (L < R) return -1;
+ if (L > R) return 1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
+ if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+ return Res;
+ if (L.ugt(R)) return 1;
+ if (R.ugt(L)) return -1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
+ // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+ // then by value interpreted as a bitstring (aka APInt).
+ const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+ if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+ APFloat::semanticsPrecision(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+ APFloat::semanticsMaxExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+ APFloat::semanticsMinExponent(SR)))
+ return Res;
+ if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+ APFloat::semanticsSizeInBits(SR)))
+ return Res;
+ return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
+ // Prevent heavy comparison, compare sizes first.
+ if (int Res = cmpNumbers(L.size(), R.size()))
+ return Res;
+
+ // Compare strings lexicographically only when it is necessary: only when
+ // strings are equal in size.
+ return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+ const AttributeSet R) const {
+ if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+ return Res;
+
+ for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+ AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+ RE = R.end(i);
+ for (; LI != LE && RI != RE; ++LI, ++RI) {
+ Attribute LA = *LI;
+ Attribute RA = *RI;
+ if (LA < RA)
+ return -1;
+ if (RA < LA)
+ return 1;
+ }
+ if (LI != LE)
+ return 1;
+ if (RI != RE)
+ return -1;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+ const MDNode* R) const {
+ if (L == R)
+ return 0;
+ if (!L)
+ return -1;
+ if (!R)
+ return 1;
+ // Range metadata is a sequence of numbers. Make sure they are the same
+ // sequence.
+ // TODO: Note that as this is metadata, it is possible to drop and/or merge
+ // this data when considering functions to merge. Thus this comparison would
+ // return 0 (i.e. equivalent), but merging would become more complicated
+ // because the ranges would need to be unioned. It is not likely that
+ // functions differ ONLY in this metadata if they are actually the same
+ // function semantically.
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+ for (size_t I = 0; I < L->getNumOperands(); ++I) {
+ ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ return Res;
+ }
+ return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+ const Instruction *R) const {
+ ImmutableCallSite LCS(L);
+ ImmutableCallSite RCS(R);
+
+ assert(LCS && RCS && "Must be calls or invokes!");
+ assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+ if (int Res =
+ cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+ return Res;
+
+ for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+ auto OBL = LCS.getOperandBundleAt(i);
+ auto OBR = RCS.getOperandBundleAt(i);
+
+ if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+ return Res;
+
+ if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+ return Res;
+ }
+
+ return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+
+ Type *TyL = L->getType();
+ Type *TyR = R->getType();
+
+ // Check whether types are bitcastable. This part is just re-factored
+ // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+ // we also pack into result which type is "less" for us.
+ int TypesRes = cmpTypes(TyL, TyR);
+ if (TypesRes != 0) {
+ // Types are different, but check whether we can bitcast them.
+ if (!TyL->isFirstClassType()) {
+ if (TyR->isFirstClassType())
+ return -1;
+ // Neither TyL nor TyR are values of first class type. Return the result
+ // of comparing the types
+ return TypesRes;
+ }
+ if (!TyR->isFirstClassType()) {
+ if (TyL->isFirstClassType())
+ return 1;
+ return TypesRes;
+ }
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ unsigned TyLWidth = 0;
+ unsigned TyRWidth = 0;
+
+ if (auto *VecTyL = dyn_cast<VectorType>(TyL))
+ TyLWidth = VecTyL->getBitWidth();
+ if (auto *VecTyR = dyn_cast<VectorType>(TyR))
+ TyRWidth = VecTyR->getBitWidth();
+
+ if (TyLWidth != TyRWidth)
+ return cmpNumbers(TyLWidth, TyRWidth);
+
+ // Zero bit-width means neither TyL nor TyR are vectors.
+ if (!TyLWidth) {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+ if (PTyL && PTyR) {
+ unsigned AddrSpaceL = PTyL->getAddressSpace();
+ unsigned AddrSpaceR = PTyR->getAddressSpace();
+ if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+ return Res;
+ }
+ if (PTyL)
+ return 1;
+ if (PTyR)
+ return -1;
+
+ // TyL and TyR aren't vectors, nor pointers. We don't know how to
+ // bitcast them.
+ return TypesRes;
+ }
+ }
+
+ // OK, types are bitcastable, now check constant contents.
+
+ if (L->isNullValue() && R->isNullValue())
+ return TypesRes;
+ if (L->isNullValue() && !R->isNullValue())
+ return 1;
+ if (!L->isNullValue() && R->isNullValue())
+ return -1;
+
+ auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+ auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+ if (GlobalValueL && GlobalValueR) {
+ return cmpGlobalValues(GlobalValueL, GlobalValueR);
+ }
+
+ if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+ return Res;
+
+ if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+ const auto *SeqR = cast<ConstantDataSequential>(R);
+ // This handles ConstantDataArray and ConstantDataVector. Note that we
+ // compare the two raw data arrays, which might differ depending on the host
+ // endianness. This isn't a problem though, because the endiness of a module
+ // will affect the order of the constants, but this order is the same
+ // for a given input module and host platform.
+ return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+ }
+
+ switch (L->getValueID()) {
+ case Value::UndefValueVal:
+ case Value::ConstantTokenNoneVal:
+ return TypesRes;
+ case Value::ConstantIntVal: {
+ const APInt &LInt = cast<ConstantInt>(L)->getValue();
+ const APInt &RInt = cast<ConstantInt>(R)->getValue();
+ return cmpAPInts(LInt, RInt);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+ const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+ return cmpAPFloats(LAPF, RAPF);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *LA = cast<ConstantArray>(L);
+ const ConstantArray *RA = cast<ConstantArray>(R);
+ uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+ uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+ cast<Constant>(RA->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *LS = cast<ConstantStruct>(L);
+ const ConstantStruct *RS = cast<ConstantStruct>(R);
+ unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (unsigned i = 0; i != NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+ cast<Constant>(RS->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *LV = cast<ConstantVector>(L);
+ const ConstantVector *RV = cast<ConstantVector>(R);
+ unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+ cast<Constant>(RV->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *LE = cast<ConstantExpr>(L);
+ const ConstantExpr *RE = cast<ConstantExpr>(R);
+ unsigned NumOperandsL = LE->getNumOperands();
+ unsigned NumOperandsR = RE->getNumOperands();
+ if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+ return Res;
+ for (unsigned i = 0; i < NumOperandsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+ cast<Constant>(RE->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::BlockAddressVal: {
+ const BlockAddress *LBA = cast<BlockAddress>(L);
+ const BlockAddress *RBA = cast<BlockAddress>(R);
+ if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+ return Res;
+ if (LBA->getFunction() == RBA->getFunction()) {
+ // They are BBs in the same function. Order by which comes first in the
+ // BB order of the function. This order is deterministic.
+ Function* F = LBA->getFunction();
+ BasicBlock *LBB = LBA->getBasicBlock();
+ BasicBlock *RBB = RBA->getBasicBlock();
+ if (LBB == RBB)
+ return 0;
+ for(BasicBlock &BB : F->getBasicBlockList()) {
+ if (&BB == LBB) {
+ assert(&BB != RBB);
+ return -1;
+ }
+ if (&BB == RBB)
+ return 1;
+ }
+ llvm_unreachable("Basic Block Address does not point to a basic block in "
+ "its function.");
+ return -1;
+ } else {
+ // cmpValues said the functions are the same. So because they aren't
+ // literally the same pointer, they must respectively be the left and
+ // right functions.
+ assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+ // cmpValues will tell us if these are equivalent BasicBlocks, in the
+ // context of their respective functions.
+ return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+ }
+ }
+ default: // Unknown constant, abort.
+ DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+ llvm_unreachable("Constant ValueID not recognized.");
+ return -1;
+ }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+ return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
+}
+
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
+
+ if (TyL == TyR)
+ return 0;
+
+ if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+ return Res;
+
+ switch (TyL->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release mode.
+ case Type::IntegerTyID:
+ return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+ cast<IntegerType>(TyR)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+ if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+ return Res;
+ return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+ }
+ // TyL == TyR would have returned true earlier, because types are uniqued.
+ case Type::VoidTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ case Type::MetadataTyID:
+ case Type::TokenTyID:
+ return 0;
+
+ case Type::PointerTyID: {
+ assert(PTyL && PTyR && "Both types must be pointers here.");
+ return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
+ }
+
+ case Type::StructTyID: {
+ StructType *STyL = cast<StructType>(TyL);
+ StructType *STyR = cast<StructType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+ if (STyL->isPacked() != STyR->isPacked())
+ return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+ for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::FunctionTyID: {
+ FunctionType *FTyL = cast<FunctionType>(TyL);
+ FunctionType *FTyR = cast<FunctionType>(TyR);
+ if (FTyL->getNumParams() != FTyR->getNumParams())
+ return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
+
+ if (FTyL->isVarArg() != FTyR->isVarArg())
+ return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
+
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
+ return Res;
+
+ for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
+ return Res;
+ }
+ return 0;
+ }
+
+ case Type::ArrayTyID: {
+ ArrayType *ATyL = cast<ArrayType>(TyL);
+ ArrayType *ATyR = cast<ArrayType>(TyR);
+ if (ATyL->getNumElements() != ATyR->getNumElements())
+ return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
+ return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
+ }
+ }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R) const {
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to isEquivalentType.
+ // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+ // * because of the above, we don't test for the tail bit on calls later on
+ if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+
+ if (int Res = cmpTypes(L->getType(), R->getType()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+ R->getRawSubclassOptionalData()))
+ return Res;
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ if (int Res =
+ cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
+ return Res;
+ }
+
+ // We have two instructions of identical opcode and #operands. Check to see
+ // if all operands are the same type
+ for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+ if (int Res =
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ return Res;
+ }
+
+ // Check special state that is a part of some instructions.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+ if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+ return Res;
+ return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+ cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+ if (int Res =
+ cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+ }
+ if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+ return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+ if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<CallInst>(R)->getCallingConv()))
+ return Res;
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(CI, R))
+ return Res;
+ return cmpRangeMetadata(
+ CI->getMetadata(LLVMContext::MD_range),
+ cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(II->getCallingConv(),
+ cast<InvokeInst>(R)->getCallingConv()))
+ return Res;
+ if (int Res =
+ cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ if (int Res = cmpOperandBundlesSchema(II, R))
+ return Res;
+ return cmpRangeMetadata(
+ II->getMetadata(LLVMContext::MD_range),
+ cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = IVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = EVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+ if (int Res =
+ cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+ }
+
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+ if (int Res = cmpNumbers(CXI->isVolatile(),
+ cast<AtomicCmpXchgInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->isWeak(),
+ cast<AtomicCmpXchgInst>(R)->isWeak()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ return Res;
+ return cmpNumbers(CXI->getSynchScope(),
+ cast<AtomicCmpXchgInst>(R)->getSynchScope());
+ }
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+ if (int Res = cmpNumbers(RMWI->getOperation(),
+ cast<AtomicRMWInst>(R)->getOperation()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->isVolatile(),
+ cast<AtomicRMWInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->getOrdering(),
+ cast<AtomicRMWInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(RMWI->getSynchScope(),
+ cast<AtomicRMWInst>(R)->getSynchScope());
+ }
+ return 0;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
+ const GEPOperator *GEPR) {
+
+ unsigned int ASL = GEPL->getPointerAddressSpace();
+ unsigned int ASR = GEPR->getPointerAddressSpace();
+
+ if (int Res = cmpNumbers(ASL, ASR))
+ return Res;
+
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
+ if (int Res = cmpTypes(GEPL->getSourceElementType(),
+ GEPR->getSourceElementType()))
+ return Res;
+
+ if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+ return Res;
+
+ for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+ if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+ return Res;
+ }
+
+ return 0;
+}
+
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+ const InlineAsm *R) const {
+ // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+ // the same, otherwise compare the fields.
+ if (L == R)
+ return 0;
+ if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+ return Res;
+ if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+ return Res;
+ if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+ return Res;
+ if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+ return Res;
+ if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+ return Res;
+ if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+ return Res;
+ llvm_unreachable("InlineAsm blocks were not uniqued.");
+ return 0;
+}
+
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+ // Catch self-reference case.
+ if (L == FnL) {
+ if (R == FnR)
+ return 0;
+ return -1;
+ }
+ if (R == FnR) {
+ if (L == FnL)
+ return 0;
+ return 1;
+ }
+
+ const Constant *ConstL = dyn_cast<Constant>(L);
+ const Constant *ConstR = dyn_cast<Constant>(R);
+ if (ConstL && ConstR) {
+ if (L == R)
+ return 0;
+ return cmpConstants(ConstL, ConstR);
+ }
+
+ if (ConstL)
+ return 1;
+ if (ConstR)
+ return -1;
+
+ const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+ const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+ if (InlineAsmL && InlineAsmR)
+ return cmpInlineAsm(InlineAsmL, InlineAsmR);
+ if (InlineAsmL)
+ return 1;
+ if (InlineAsmR)
+ return -1;
+
+ auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+ RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+ return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
+// Test whether two basic blocks have equivalent behaviour.
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+ const BasicBlock *BBR) {
+ BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+ BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
+
+ do {
+ if (int Res = cmpValues(&*InstL, &*InstR))
+ return Res;
+
+ const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
+ const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR);
+
+ if (GEPL && !GEPR)
+ return 1;
+ if (GEPR && !GEPL)
+ return -1;
+
+ if (GEPL && GEPR) {
+ if (int Res =
+ cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+ return Res;
+ if (int Res = cmpGEPs(GEPL, GEPR))
+ return Res;
+ } else {
+ if (int Res = cmpOperations(&*InstL, &*InstR))
+ return Res;
+ assert(InstL->getNumOperands() == InstR->getNumOperands());
+
+ for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+ Value *OpL = InstL->getOperand(i);
+ Value *OpR = InstR->getOperand(i);
+ if (int Res = cmpValues(OpL, OpR))
+ return Res;
+ // cmpValues should ensure this is true.
+ assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
+ }
+ }
+
+ ++InstL, ++InstR;
+ } while (InstL != InstLE && InstR != InstRE);
+
+ if (InstL != InstLE && InstR == InstRE)
+ return 1;
+ if (InstL == InstLE && InstR != InstRE)
+ return -1;
+ return 0;
+}
+
+// Test whether the two functions have equivalent behaviour.
+int FunctionComparator::compare() {
+ sn_mapL.clear();
+ sn_mapR.clear();
+
+ if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+ return Res;
+
+ if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+ return Res;
+
+ if (FnL->hasGC()) {
+ if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+ return Res;
+
+ if (FnL->hasSection()) {
+ if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
+ return Res;
+ }
+
+ if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+ return Res;
+
+ // TODO: if it's internal and only used in direct calls, we could handle this
+ // case too.
+ if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+ return Res;
+
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
+ return Res;
+
+ assert(FnL->arg_size() == FnR->arg_size() &&
+ "Identically typed functions have different numbers of args!");
+
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+ ArgRI = FnR->arg_begin(),
+ ArgLE = FnL->arg_end();
+ ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+ if (cmpValues(&*ArgLI, &*ArgRI) != 0)
+ llvm_unreachable("Arguments repeat!");
+ }
+
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+ FnLBBs.push_back(&FnL->getEntryBlock());
+ FnRBBs.push_back(&FnR->getEntryBlock());
+
+ VisitedBBs.insert(FnLBBs[0]);
+ while (!FnLBBs.empty()) {
+ const BasicBlock *BBL = FnLBBs.pop_back_val();
+ const BasicBlock *BBR = FnRBBs.pop_back_val();
+
+ if (int Res = cmpValues(BBL, BBR))
+ return Res;
+
+ if (int Res = cmpBasicBlocks(BBL, BBR))
+ return Res;
+
+ const TerminatorInst *TermL = BBL->getTerminator();
+ const TerminatorInst *TermR = BBR->getTerminator();
+
+ assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+ for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
+ continue;
+
+ FnLBBs.push_back(TermL->getSuccessor(i));
+ FnRBBs.push_back(TermR->getSuccessor(i));
+ }
+ }
+ return 0;
+}
+
+namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+ void add(uint64_t V) {
+ Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+ }
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const TerminatorInst *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
+
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions()
+ : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+ HasGlobalAliases(false) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+private:
+ // The function comparison operator is provided here so that FunctionNodes do
+ // not need to become larger with another pointer.
+ class FunctionNodeCmp {
+ GlobalNumberState* GlobalNumbers;
+ public:
+ FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+ bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+ // Order first by hashes, then full function comparison.
+ if (LHS.getHash() != RHS.getHash())
+ return LHS.getHash() < RHS.getHash();
+ FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+ return FCmp.compare() == -1;
+ }
+ };
+ typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+ GlobalNumberState GlobalNumbers;
+
+ /// A work queue of functions that may have been modified and should be
+ /// analyzed again.
+ std::vector<WeakVH> Deferred;
+
+ /// Checks the rules of order relation introduced among functions set.
+ /// Returns true, if sanity check has been passed, and false if failed.
+ bool doSanityCheck(std::vector<WeakVH> &Worklist);
+
+ /// Insert a ComparableFunction into the FnTree, or merge it away if it's
+ /// equal to one that's already present.
+ bool insert(Function *NewFunction);
+
+ /// Remove a Function from the FnTree and queue it up for a second sweep of
+ /// analysis.
+ void remove(Function *F);
+
+ /// Find the functions that use this Value and remove them from FnTree and
+ /// queue the functions.
+ void removeUsers(Value *V);
+
+ /// Replace all direct calls of Old with calls of New. Will bitcast New if
+ /// necessary to make types match.
+ void replaceDirectCallers(Function *Old, Function *New);
+
+ /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+ /// be converted into a thunk. In either case, it should never be visited
+ /// again.
+ void mergeTwoFunctions(Function *F, Function *G);
+
+ /// Replace G with a thunk or an alias to F. Deletes G.
+ void writeThunkOrAlias(Function *F, Function *G);
+
+ /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+ /// of G with bitcast(F). Deletes G.
+ void writeThunk(Function *F, Function *G);
+
+ /// Replace G with an alias to F. Deletes G.
+ void writeAlias(Function *F, Function *G);
+
+ /// Replace function F with function G in the function tree.
+ void replaceFunctionInTree(const FunctionNode &FN, Function *G);
+
+ /// The set of all distinct functions. Use the insert() and remove() methods
+ /// to modify it. The map allows efficient lookup and deferring of Functions.
+ FnTreeType FnTree;
+ // Map functions to the iterators of the FunctionNode which contains them
+ // in the FnTree. This must be updated carefully whenever the FnTree is
+ // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+ // dangling iterators into FnTree. The invariant that preserves this is that
+ // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+ ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
+
+ /// Whether or not the target supports global aliases.
+ bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+ if (const unsigned Max = NumFunctionsForSanityCheck) {
+ unsigned TripleNumber = 0;
+ bool Valid = true;
+
+ dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
+
+ unsigned i = 0;
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+ I != E && i < Max; ++I, ++i) {
+ unsigned j = i;
+ for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+ Function *F1 = cast<Function>(*I);
+ Function *F2 = cast<Function>(*J);
+ int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+ int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
+
+ // If F1 <= F2, then F2 >= F1, otherwise report failure.
+ if (Res1 != -Res2) {
+ dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
+ << "\n";
+ F1->dump();
+ F2->dump();
+ Valid = false;
+ }
+
+ if (Res1 == 0)
+ continue;
+
+ unsigned k = j;
+ for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+ ++k, ++K, ++TripleNumber) {
+ if (K == J)
+ continue;
+
+ Function *F3 = cast<Function>(*K);
+ int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+ int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
+
+ bool Transitive = true;
+
+ if (Res1 != 0 && Res1 == Res4) {
+ // F1 > F2, F2 > F3 => F1 > F3
+ Transitive = Res3 == Res1;
+ } else if (Res3 != 0 && Res3 == -Res4) {
+ // F1 > F3, F3 > F2 => F1 > F2
+ Transitive = Res3 == Res1;
+ } else if (Res4 != 0 && -Res3 == Res4) {
+ // F2 > F3, F3 > F1 => F2 > F1
+ Transitive = Res4 == -Res1;
+ }
+
+ if (!Transitive) {
+ dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
+ << TripleNumber << "\n";
+ dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
+ << Res4 << "\n";
+ F1->dump();
+ F2->dump();
+ F3->dump();
+ Valid = false;
+ }
+ }
+ }
+ }
+
+ dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
+ return Valid;
+ }
+ return true;
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // All functions in the module, ordered by hash. Functions with a unique
+ // hash value are easily eliminated.
+ std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+ HashedFuncs;
+ for (Function &Func : M) {
+ if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+ }
+ }
+
+ std::stable_sort(
+ HashedFuncs.begin(), HashedFuncs.end(),
+ [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+ const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+ return a.first < b.first;
+ });
+
+ auto S = HashedFuncs.begin();
+ for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+ // If the hash value matches the previous value or the next one, we must
+ // consider merging it. Otherwise it is dropped and never considered again.
+ if ((I != S && std::prev(I)->first == I->first) ||
+ (std::next(I) != IE && std::next(I)->first == I->first) ) {
+ Deferred.push_back(WeakVH(I->second));
+ }
+ }
+
+ do {
+ std::vector<WeakVH> Worklist;
+ Deferred.swap(Worklist);
+
+ DEBUG(doSanityCheck(Worklist));
+
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+ DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden()) {
+ Changed |= insert(F);
+ }
+ }
+
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden()) {
+ Changed |= insert(F);
+ }
+ }
+ DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
+ } while (!Deferred.empty());
+
+ FnTree.clear();
+ GlobalNumbers.clear();
+
+ return Changed;
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+ Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+ for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
+ Use *U = &*UI;
+ ++UI;
+ CallSite CS(U->getUser());
+ if (CS && CS.isCallee(U)) {
+ // Transfer the called function's attributes to the call site. Due to the
+ // bitcast we will 'lose' ABI changing attributes because the 'called
+ // function' is no longer a Function* but the bitcast. Code that looks up
+ // the attributes from the called function will fail.
+
+ // FIXME: This is not actually true, at least not anymore. The callsite
+ // will always have the same ABI affecting attributes as the callee,
+ // because otherwise the original input has UB. Note that Old and New
+ // always have matching ABI, so no attributes need to be changed.
+ // Transferring other attributes may help other optimizations, but that
+ // should be done uniformly and not in this ad-hoc way.
+ auto &Context = New->getContext();
+ auto NewFuncAttrs = New->getAttributes();
+ auto CallSiteAttrs = CS.getAttributes();
+
+ CallSiteAttrs = CallSiteAttrs.addAttributes(
+ Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+ AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+ if (Attrs.getNumSlots())
+ CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+ }
+
+ CS.setAttributes(CallSiteAttrs);
+
+ remove(CS.getInstruction()->getParent()->getParent());
+ U->set(BitcastNew);
+ }
+ }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+ G->hasWeakLinkage()) {
+ writeAlias(F, G);
+ return;
+ }
+ }
+
+ writeThunk(F, G);
+}
+
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simpler then CastInst::getCastOpcode.
+static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element = createCast(
+ Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
+ DestTy->getStructElementType(I));
+
+ Result =
+ Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+ if (!G->mayBeOverridden()) {
+ // Redirect direct callers of G to F.
+ replaceDirectCallers(G, F);
+ }
+
+ // If G was internal then we may have replaced all uses of G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
+ Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
+
+ SmallVector<Value *, 16> Args;
+ unsigned i = 0;
+ FunctionType *FFTy = F->getFunctionType();
+ for (Argument & AI : NewG->args()) {
+ Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
+ ++i;
+ }
+
+ CallInst *CI = Builder.CreateCall(F, Args);
+ CI->setTailCall();
+ CI->setCallingConv(F->getCallingConv());
+ CI->setAttributes(F->getAttributes());
+ if (NewG->getReturnType()->isVoidTy()) {
+ Builder.CreateRetVoid();
+ } else {
+ Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
+ }
+
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
+
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ removeUsers(F);
+ F->replaceAllUsesWith(H);
+
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+ if (HasGlobalAliases) {
+ writeAlias(F, G);
+ writeAlias(F, H);
+ } else {
+ writeThunk(F, G);
+ writeThunk(F, H);
+ }
+
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ ++NumDoubleWeak;
+ } else {
+ writeThunkOrAlias(F, G);
+ }
+
+ ++NumFunctionsMerged;
+}
+
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
+ Function *G) {
+ Function *F = FN.getFunc();
+ assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+ "The two functions must be equal");
+
+ auto I = FNodesInTree.find(F);
+ assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+ assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+
+ FnTreeType::iterator IterToFNInFnTree = I->second;
+ assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+ // Remove F -> FN and insert G -> FN
+ FNodesInTree.erase(I);
+ FNodesInTree.insert({G, IterToFNInFnTree});
+ // Replace F with G in FN, which is stored inside the FnTree.
+ FN.replaceBy(G);
+}
+
+// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(Function *NewFunction) {
+ std::pair<FnTreeType::iterator, bool> Result =
+ FnTree.insert(FunctionNode(NewFunction));
+
+ if (Result.second) {
+ assert(FNodesInTree.count(NewFunction) == 0);
+ FNodesInTree.insert({NewFunction, Result.first});
+ DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
+ return false;
+ }
+
+ const FunctionNode &OldF = *Result.first;
+
+ // Don't merge tiny functions, since it can just end up making the function
+ // larger.
+ // FIXME: Should still merge them if they are unnamed_addr and produce an
+ // alias.
+ if (NewFunction->size() == 1) {
+ if (NewFunction->front().size() <= 2) {
+ DEBUG(dbgs() << NewFunction->getName()
+ << " is to small to bother merging\n");
+ return false;
+ }
+ }
+
+ // Impose a total order (by name) on the replacement of functions. This is
+ // important when operating on more than one module independently to prevent
+ // cycles of thunks calling each other when the modules are linked together.
+ //
+ // When one function is weak and the other is strong there is an order imposed
+ // already. We process strong functions before weak functions.
+ if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) ||
+ (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden()))
+ if (OldF.getFunc()->getName() > NewFunction->getName()) {
+ // Swap the two functions.
+ Function *F = OldF.getFunc();
+ replaceFunctionInTree(*Result.first, NewFunction);
+ NewFunction = F;
+ assert(OldF.getFunc() != F && "Must have swapped the functions.");
+ }
+
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
+
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName()
+ << " == " << NewFunction->getName() << '\n');
+
+ Function *DeleteF = NewFunction;
+ mergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
+
+// Remove a function from FnTree. If it was already in FnTree, add
+// it to Deferred so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+ auto I = FNodesInTree.find(F);
+ if (I != FNodesInTree.end()) {
+ DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+ FnTree.erase(I->second);
+ // I->second has been invalidated, remove it from the FNodesInTree map to
+ // preserve the invariant.
+ FNodesInTree.erase(I);
+ Deferred.emplace_back(F);
+ }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+ std::vector<Value *> Worklist;
+ Worklist.push_back(V);
+ SmallSet<Value*, 8> Visited;
+ Visited.insert(V);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+
+ for (User *U : V->users()) {
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ remove(I->getParent()->getParent());
+ } else if (isa<GlobalValue>(U)) {
+ // do nothing
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ for (User *UU : C->users()) {
+ if (!Visited.insert(UU).second)
+ Worklist.push_back(UU);
+ }
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 0000000..0c5c84b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,183 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "partialinlining"
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+ struct PartialInliner : public ModulePass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override { }
+ static char ID; // Pass identification, replacement for typeid
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module& M) override;
+
+ private:
+ Function* unswitchFunction(Function* F);
+ };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+ // First, verify that this function is an unswitching candidate...
+ BasicBlock *entryBlock = &F->front();
+ BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+ if (!BR || BR->isUnconditional())
+ return nullptr;
+
+ BasicBlock* returnBlock = nullptr;
+ BasicBlock* nonReturnBlock = nullptr;
+ unsigned returnCount = 0;
+ for (BasicBlock *BB : successors(entryBlock)) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ returnBlock = BB;
+ returnCount++;
+ } else
+ nonReturnBlock = BB;
+ }
+
+ if (returnCount != 1)
+ return nullptr;
+
+ // Clone the function, so that we can hack away on it.
+ ValueToValueMapTy VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
+ duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(duplicateFunction);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+
+ // Go ahead and update all uses to the duplicate, so that we can just
+ // use the inliner functionality when we're done hacking.
+ F->replaceAllUsesWith(duplicateFunction);
+
+ // Special hackery is needed with PHI nodes that have inputs from more than
+ // one extracted block. For simplicity, just split the PHIs into a two-level
+ // sequence of PHIs, some of which will go in the extracted region, and some
+ // of which will go outside.
+ BasicBlock* preReturn = newReturnBlock;
+ newReturnBlock = newReturnBlock->splitBasicBlock(
+ newReturnBlock->getFirstNonPHI()->getIterator());
+ BasicBlock::iterator I = preReturn->begin();
+ Instruction *Ins = &newReturnBlock->front();
+ while (I != preReturn->end()) {
+ PHINode* OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi) break;
+
+ PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+ OldPhi->replaceAllUsesWith(retPhi);
+ Ins = newReturnBlock->getFirstNonPHI();
+
+ retPhi->addIncoming(&*I, preReturn);
+ retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+ newEntryBlock);
+ OldPhi->removeIncomingValue(newEntryBlock);
+
+ ++I;
+ }
+ newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+
+ // Gather up the blocks that we're going to extract.
+ std::vector<BasicBlock*> toExtract;
+ toExtract.push_back(newNonReturnBlock);
+ for (Function::iterator FI = duplicateFunction->begin(),
+ FE = duplicateFunction->end(); FI != FE; ++FI)
+ if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+ &*FI != newNonReturnBlock)
+ toExtract.push_back(&*FI);
+
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.recalculate(*duplicateFunction);
+
+ // Extract the body of the if.
+ Function* extractedFunction
+ = CodeExtractor(toExtract, &DT).extractCodeRegion();
+
+ InlineFunctionInfo IFI;
+
+ // Inline the top-level if test into all callers.
+ std::vector<User *> Users(duplicateFunction->user_begin(),
+ duplicateFunction->user_end());
+ for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+ UI != UE; ++UI)
+ if (CallInst *CI = dyn_cast<CallInst>(*UI))
+ InlineFunction(CI, IFI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+ InlineFunction(II, IFI);
+
+ // Ditch the duplicate, since we're done with it, and rewrite all remaining
+ // users (function pointers, etc.) back to the original function.
+ duplicateFunction->replaceAllUsesWith(F);
+ duplicateFunction->eraseFromParent();
+
+ ++NumPartialInlined;
+
+ return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+ std::vector<Function*> worklist;
+ worklist.reserve(M.size());
+ for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+ if (!FI->use_empty() && !FI->isDeclaration())
+ worklist.push_back(&*FI);
+
+ bool changed = false;
+ while (!worklist.empty()) {
+ Function* currFunc = worklist.back();
+ worklist.pop_back();
+
+ if (currFunc->use_empty()) continue;
+
+ bool recursive = false;
+ for (User *U : currFunc->users())
+ if (Instruction* I = dyn_cast<Instruction>(U))
+ if (I->getParent()->getParent() == currFunc) {
+ recursive = true;
+ break;
+ }
+ if (recursive) continue;
+
+
+ if (Function* newFunc = unswitchFunction(currFunc)) {
+ worklist.push_back(newFunc);
+ changed = true;
+ }
+
+ }
+
+ return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
new file mode 100644
index 0000000..9876efa
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,733 @@
+//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManagerBuilder class, which is used to set up a
+// "standard" optimization sequence suitable for languages like C and C++.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+RunLoopVectorization("vectorize-loops", cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
+
+static cl::opt<bool>
+RunSLPVectorization("vectorize-slp", cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
+ cl::desc("Run the BB vectorization passes"));
+
+static cl::opt<bool>
+UseGVNAfterVectorization("use-gvn-after-vectorization",
+ cl::init(false), cl::Hidden,
+ cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+
+static cl::opt<bool> ExtraVectorizerPasses(
+ "extra-vectorizer-passes", cl::init(false), cl::Hidden,
+ cl::desc("Run cleanup optimization passes after vectorization."));
+
+static cl::opt<bool> UseNewSROA("use-new-sroa",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable the new, experimental SROA pass"));
+
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+ cl::desc("Run the loop rerolling pass"));
+
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+ cl::desc("Run the float2int (float demotion) pass"));
+
+static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
+ cl::Hidden,
+ cl::desc("Run the load combining pass"));
+
+static cl::opt<bool>
+RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
+ cl::init(true), cl::Hidden,
+ cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
+ "vectorizer instead of before"));
+
+static cl::opt<bool> UseCFLAA("use-cfl-aa",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis"));
+
+static cl::opt<bool>
+EnableMLSM("mlsm", cl::init(true), cl::Hidden,
+ cl::desc("Enable motion of merged load and store"));
+
+static cl::opt<bool> EnableLoopInterchange(
+ "enable-loopinterchange", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
+static cl::opt<bool> EnableLoopDistribute(
+ "enable-loop-distribute", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+ "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+ cl::desc(
+ "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+ "enable-loop-load-elim", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
+PassManagerBuilder::PassManagerBuilder() {
+ OptLevel = 2;
+ SizeLevel = 0;
+ LibraryInfo = nullptr;
+ Inliner = nullptr;
+ FunctionIndex = nullptr;
+ DisableUnitAtATime = false;
+ DisableUnrollLoops = false;
+ BBVectorize = RunBBVectorization;
+ SLPVectorize = RunSLPVectorization;
+ LoopVectorize = RunLoopVectorization;
+ RerollLoops = RunLoopRerolling;
+ LoadCombine = RunLoadCombine;
+ DisableGVNLoadPRE = false;
+ VerifyInput = false;
+ VerifyOutput = false;
+ MergeFunctions = false;
+ PrepareForLTO = false;
+}
+
+PassManagerBuilder::~PassManagerBuilder() {
+ delete LibraryInfo;
+ delete Inliner;
+}
+
+/// Set of global extensions, automatically added as part of the standard set.
+static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+
+void PassManagerBuilder::addGlobalExtension(
+ PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
+ Extensions.push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
+ legacy::PassManagerBase &PM) const {
+ for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
+ if ((*GlobalExtensions)[i].first == ETy)
+ (*GlobalExtensions)[i].second(*this, PM);
+ for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
+ if (Extensions[i].first == ETy)
+ Extensions[i].second(*this, PM);
+}
+
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+ legacy::PassManagerBase &PM) const {
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ if (UseCFLAA)
+ PM.add(createCFLAAWrapperPass());
+ PM.add(createTypeBasedAAWrapperPass());
+ PM.add(createScopedNoAliasAAWrapperPass());
+}
+
+void PassManagerBuilder::populateFunctionPassManager(
+ legacy::FunctionPassManager &FPM) {
+ addExtensionsToPM(EP_EarlyAsPossible, FPM);
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo)
+ FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+ if (OptLevel == 0) return;
+
+ addInitialAliasAnalysisPasses(FPM);
+
+ FPM.add(createCFGSimplificationPass());
+ if (UseNewSROA)
+ FPM.add(createSROAPass());
+ else
+ FPM.add(createScalarReplAggregatesPass());
+ FPM.add(createEarlyCSEPass());
+ FPM.add(createLowerExpectIntrinsicPass());
+}
+
+void PassManagerBuilder::populateModulePassManager(
+ legacy::PassManagerBase &MPM) {
+ // Allow forcing function attributes as a debugging and tuning aid.
+ MPM.add(createForceFunctionAttrsLegacyPass());
+
+ // If all optimizations are disabled, just run the always-inline pass and,
+ // if enabled, the function merging pass.
+ if (OptLevel == 0) {
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = nullptr;
+ }
+
+ // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
+ // creates a CGSCC pass manager, but we don't want to add extensions into
+ // that pass manager. To prevent this we insert a no-op module pass to reset
+ // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
+ // builds. The function merging pass is
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+ else if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+ return;
+ }
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo)
+ MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+ addInitialAliasAnalysisPasses(MPM);
+
+ if (!DisableUnitAtATime) {
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
+
+ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars
+ MPM.add(createPromoteMemoryToRegisterPass());
+
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ addExtensionsToPM(EP_Peephole, MPM);
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ }
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
+
+ // Start of CallGraph SCC passes.
+ if (!DisableUnitAtATime)
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = nullptr;
+ }
+ if (!DisableUnitAtATime)
+ MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ if (OptLevel > 2)
+ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ // Start of function pass.
+ // Break up aggregate allocas, using SSAUpdater.
+ if (UseNewSROA)
+ MPM.add(createSROAPass());
+ else
+ MPM.add(createScalarReplAggregatesPass(-1, false));
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+ addExtensionsToPM(EP_Peephole, MPM);
+
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createReassociatePass()); // Reassociate expressions
+ // Rotate Loop - disable header duplication at -Oz
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+ MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ MPM.add(createLoopDeletionPass()); // Delete dead loops
+ if (EnableLoopInterchange) {
+ MPM.add(createLoopInterchangePass()); // Interchange loops
+ MPM.add(createCFGSimplificationPass());
+ }
+ if (!DisableUnrollLoops)
+ MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+
+ if (OptLevel > 1) {
+ if (EnableMLSM)
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ }
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
+ MPM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass());
+
+ addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
+ if (RerollLoops)
+ MPM.add(createLoopRerollPass());
+ if (!RunSLPAfterLoopVectorization) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
+ }
+
+ if (LoadCombine)
+ MPM.add(createLoadCombinePass());
+
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+ addExtensionsToPM(EP_Peephole, MPM);
+
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager that we are specifically trying to avoid. To prevent this
+ // we must insert a no-op module pass to reset the pass manager.
+ MPM.add(createBarrierNoopPass());
+
+ if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+ // Remove avail extern fns and globals definitions if we aren't
+ // compiling an object file for later LTO. For LTO we want to preserve
+ // these so they are eligible for inlining at link-time. Note if they
+ // are unreferenced they will be removed by GlobalDCE later, so
+ // this only impacts referenced available externally globals.
+ // Eventually they will be suppressed during codegen, but eliminating
+ // here enables more opportunity for GlobalDCE as it may make
+ // globals referenced by available external functions dead
+ // and saves running remaining passes on the eliminated functions.
+ MPM.add(createEliminateAvailableExternallyPass());
+ }
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a fresh GlobalsModRef run at this point. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ //
+ // Note that this relies on a bug in the pass manager which preserves
+ // a module analysis into a function pass pipeline (and throughout it) so
+ // long as the first function pass doesn't invalidate the module analysis.
+ // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+ // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+ // (doing nothing preserves it as it is required to be conservatively
+ // correct in the face of IR changes).
+ MPM.add(createGlobalsAAWrapperPass());
+
+ if (RunFloat2Int)
+ MPM.add(createFloat2IntPass());
+
+ addExtensionsToPM(EP_VectorizerStart, MPM);
+
+ // Re-rotate loops in all our loop nests. These may have fallout out of
+ // rotated form due to GVN or other transformations, and the vectorizer relies
+ // on the rotated form. Disable header duplication at -Oz.
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization.
+ if (EnableLoopDistribute)
+ MPM.add(createLoopDistributePass());
+
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ if (EnableLoopLoadElim)
+ MPM.add(createLoopLoadEliminationPass());
+
+ // FIXME: Because of #pragma vectorize enable, the passes below are always
+ // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
+ // on -O1 and no #pragma is found). Would be good to have these two passes
+ // as function calls, so that we can only pass them when the vectorizer
+ // changed the code.
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correllated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ MPM.add(createEarlyCSEPass());
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createLICMPass());
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
+ }
+
+ if (RunSLPAfterLoopVectorization) {
+ if (SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
+ }
+ }
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
+ }
+
+ addExtensionsToPM(EP_Peephole, MPM);
+ MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
+
+ if (!DisableUnrollLoops) {
+ MPM.add(createLoopUnrollPass()); // Unroll small loops
+
+ // LoopUnroll may generate some redundency to cleanup.
+ MPM.add(createInstructionCombiningPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ MPM.add(createLICMPass());
+ }
+
+ // After vectorization and unrolling, assume intrinsics may tell us more
+ // about pointer alignments.
+ MPM.add(createAlignmentFromAssumptionsPass());
+
+ if (!DisableUnitAtATime) {
+ // FIXME: We shouldn't bother with this anymore.
+ MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
+ // late pass of GlobalDCE. It is capable of deleting dead cycles.
+ if (OptLevel > 1) {
+ MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
+ MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
+ }
+
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+
+ addExtensionsToPM(EP_OptimizerLast, MPM);
+}
+
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
+ // Provide AliasAnalysis services for optimizations.
+ addInitialAliasAnalysisPasses(PM);
+
+ if (FunctionIndex)
+ PM.add(createFunctionImportPass(FunctionIndex));
+
+ // Allow forcing function attributes as a debugging and tuning aid.
+ PM.add(createForceFunctionAttrsLegacyPass());
+
+ // Infer attributes about declarations if possible.
+ PM.add(createInferFunctionAttrsLegacyPass());
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ PM.add(createIPSCCPPass());
+
+ // Now that we internalized some globals, see if we can hack on them!
+ PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+ PM.add(createGlobalOptimizerPass());
+ // Promote any localized global vars.
+ PM.add(createPromoteMemoryToRegisterPass());
+
+ // Linking modules together can lead to duplicated global constants, only
+ // keep one copy of each constant.
+ PM.add(createConstantMergePass());
+
+ // Remove unused arguments from functions.
+ PM.add(createDeadArgEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
+
+ // Inline small functions
+ bool RunInliner = Inliner;
+ if (RunInliner) {
+ PM.add(Inliner);
+ Inliner = nullptr;
+ }
+
+ PM.add(createPruneEHPass()); // Remove dead EH info.
+
+ // Optimize globals again if we ran the inliner.
+ if (RunInliner)
+ PM.add(createGlobalOptimizerPass());
+ PM.add(createGlobalDCEPass()); // Remove dead functions.
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ PM.add(createArgumentPromotionPass());
+
+ // The IPO passes may leave cruft around. Clean up after them.
+ PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
+ PM.add(createJumpThreadingPass());
+
+ // Break up allocas
+ if (UseNewSROA)
+ PM.add(createSROAPass());
+ else
+ PM.add(createScalarReplAggregatesPass());
+
+ // Run a few AA driven optimizations here and now, to cleanup the code.
+ PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
+
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ if (EnableMLSM)
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+ PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+
+ // Nuke dead stores.
+ PM.add(createDeadStoreEliminationPass());
+
+ // More loops are countable; try to optimize them.
+ PM.add(createIndVarSimplifyPass());
+ PM.add(createLoopDeletionPass());
+ if (EnableLoopInterchange)
+ PM.add(createLoopInterchangePass());
+
+ PM.add(createLoopVectorizePass(true, LoopVectorize));
+
+ // Now that we've optimized loops (in particular loop induction variables),
+ // we may have exposed more scalar opportunities. Run parts of the scalar
+ // optimizer again at this point.
+ PM.add(createInstructionCombiningPass()); // Initial cleanup
+ PM.add(createCFGSimplificationPass()); // if-convert
+ PM.add(createSCCPPass()); // Propagate exposed constants
+ PM.add(createInstructionCombiningPass()); // Clean up again
+ PM.add(createBitTrackingDCEPass());
+
+ // More scalar chains could be vectorized due to more alias information
+ if (RunSLPAfterLoopVectorization)
+ if (SLPVectorize)
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ // After vectorization, assume intrinsics may tell us more about pointer
+ // alignments.
+ PM.add(createAlignmentFromAssumptionsPass());
+
+ if (LoadCombine)
+ PM.add(createLoadCombinePass());
+
+ // Cleanup and simplify the code after the scalar optimizations.
+ PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
+
+ PM.add(createJumpThreadingPass());
+}
+
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+ legacy::PassManagerBase &PM) {
+ // Delete basic blocks, which optimization passes may have killed.
+ PM.add(createCFGSimplificationPass());
+
+ // Drop bodies of available externally objects to improve GlobalDCE.
+ PM.add(createEliminateAvailableExternallyPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ PM.add(createGlobalDCEPass());
+
+ // FIXME: this is profitable (for compiler time) to do at -O0 too, but
+ // currently it damages debug info.
+ if (MergeFunctions)
+ PM.add(createMergeFunctionsPass());
+}
+
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
+ if (LibraryInfo)
+ PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+ if (VerifyInput)
+ PM.add(createVerifierPass());
+
+ if (OptLevel > 1)
+ addLTOOptimizationPasses(PM);
+
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ PM.add(createCrossDSOCFIPass());
+
+ // Lower bit sets to globals. This pass supports Clang's control flow
+ // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+ // is enabled. The pass does nothing if CFI is disabled.
+ PM.add(createLowerBitSetsPass());
+
+ if (OptLevel != 0)
+ addLateLTOOptimizationPasses(PM);
+
+ if (VerifyOutput)
+ PM.add(createVerifierPass());
+}
+
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+ return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+ return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
+ PassManagerBuilder *PMB = new PassManagerBuilder();
+ return wrap(PMB);
+}
+
+void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ delete Builder;
+}
+
+void
+LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned OptLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->OptLevel = OptLevel;
+}
+
+void
+LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned SizeLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->SizeLevel = SizeLevel;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnitAtATime = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnrollLoops = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ // NOTE: The simplify-libcalls pass has been removed.
+}
+
+void
+LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
+ unsigned Threshold) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->Inliner = createFunctionInliningPass(Threshold);
+}
+
+void
+LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
+ Builder->populateFunctionPassManager(*FPM);
+}
+
+void
+LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ legacy::PassManagerBase *MPM = unwrap(PM);
+ Builder->populateModulePassManager(*MPM);
+}
+
+void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM,
+ LLVMBool Internalize,
+ LLVMBool RunInliner) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ legacy::PassManagerBase *LPM = unwrap(PM);
+
+ // A small backwards compatibility hack. populateLTOPassManager used to take
+ // an RunInliner option.
+ if (RunInliner && !Builder->Inliner)
+ Builder->Inliner = createFunctionInliningPass();
+
+ Builder->populateLTOPassManager(*LPM);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 0000000..3af4afb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,273 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "prune-eh"
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+ struct PruneEH : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
+
+ // runOnSCC - Analyze the SCC, performing the transformation if possible.
+ bool runOnSCC(CallGraphSCC &SCC) override;
+
+ bool SimplifyFunction(Function *F);
+ void DeleteBasicBlock(BasicBlock *BB);
+ };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+ SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ bool MadeChange = false;
+
+ // Fill SCCNodes with the elements of the SCC. Used for quickly
+ // looking up whether a given CallGraphNode is in this SCC.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ SCCNodes.insert(*I);
+
+ // First pass, scan all of the functions in the SCC, simplifying them
+ // according to what we know.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+
+ // Next, check to see if any callees might throw or if there are any external
+ // functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
+ // If this SCC includes the unwind instruction, we KNOW it throws, so
+ // obviously the SCC might throw.
+ //
+ bool SCCMightUnwind = false, SCCMightReturn = false;
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
+ (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+ Function *F = (*I)->getFunction();
+ if (!F) {
+ SCCMightUnwind = true;
+ SCCMightReturn = true;
+ } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ SCCMightUnwind |= !F->doesNotThrow();
+ SCCMightReturn |= !F->doesNotReturn();
+ } else {
+ bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+ bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+ // Determine if we should scan for InlineAsm in a naked function as it
+ // is the only way to return without a ReturnInst. Only do this for
+ // no-inline functions as functions which may be inlined cannot
+ // meaningfully return via assembly.
+ bool CheckReturnViaAsm = CheckReturn &&
+ F->hasFnAttribute(Attribute::Naked) &&
+ F->hasFnAttribute(Attribute::NoInline);
+
+ if (!CheckUnwind && !CheckReturn)
+ continue;
+
+ for (const BasicBlock &BB : *F) {
+ const TerminatorInst *TI = BB.getTerminator();
+ if (CheckUnwind && TI->mayThrow()) {
+ SCCMightUnwind = true;
+ } else if (CheckReturn && isa<ReturnInst>(TI)) {
+ SCCMightReturn = true;
+ }
+
+ for (const Instruction &I : BB) {
+ if ((!CheckUnwind || SCCMightUnwind) &&
+ (!CheckReturnViaAsm || SCCMightReturn))
+ break;
+
+ // Check to see if this function performs an unwind or calls an
+ // unwinding function.
+ if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) {
+ bool InstMightUnwind = true;
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
+ if (Function *Callee = CI->getCalledFunction()) {
+ CallGraphNode *CalleeNode = CG[Callee];
+ // If the callee is outside our current SCC then we may throw
+ // because it might. If it is inside, do nothing.
+ if (SCCNodes.count(CalleeNode) > 0)
+ InstMightUnwind = false;
+ }
+ }
+ SCCMightUnwind |= InstMightUnwind;
+ }
+ if (CheckReturnViaAsm && !SCCMightReturn)
+ if (auto ICS = ImmutableCallSite(&I))
+ if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue()))
+ if (IA->hasSideEffects())
+ SCCMightReturn = true;
+ }
+
+ if (SCCMightUnwind && SCCMightReturn)
+ break;
+ }
+ }
+ }
+
+ // If the SCC doesn't unwind or doesn't throw, note this fact.
+ if (!SCCMightUnwind || !SCCMightReturn)
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+ F->addFnAttr(Attribute::NoUnwind);
+ MadeChange = true;
+ }
+
+ if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+ F->addFnAttr(Attribute::NoReturn);
+ MadeChange = true;
+ }
+ }
+
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ // Convert any invoke instructions to non-throwing functions in this node
+ // into call instructions with a branch. This makes the exception blocks
+ // dead.
+ if (Function *F = (*I)->getFunction())
+ MadeChange |= SimplifyFunction(F);
+ }
+
+ return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+ bool MadeChange = false;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
+ SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+ "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction
+ // now uses the value produced by the call instruction. Note that we
+ // do this even for void functions and calls with no uses so that the
+ // callgraph edge is updated.
+ II->replaceAllUsesWith(Call);
+ BasicBlock *UnwindBlock = II->getUnwindDest();
+ UnwindBlock->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the
+ // invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ BB->getInstList().pop_back();
+
+ // If the unwind block is now dead, nuke it.
+ if (pred_empty(UnwindBlock))
+ DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+
+ ++NumRemoved;
+ MadeChange = true;
+ }
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ // This call calls a function that cannot return. Insert an
+ // unreachable instruction after it and simplify the code. Do this
+ // by splitting the BB, adding the unreachable, then deleting the
+ // new BB.
+ BasicBlock *New = BB->splitBasicBlock(I);
+
+ // Remove the uncond branch and add an unreachable.
+ BB->getInstList().pop_back();
+ new UnreachableInst(BB->getContext(), &*BB);
+
+ DeleteBasicBlock(New); // Delete the new BB.
+ MadeChange = true;
+ ++NumUnreach;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+ assert(pred_empty(BB) && "BB is not dead!");
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+ CallGraphNode *CGN = CG[BB->getParent()];
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+ --I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (!isa<IntrinsicInst>(I))
+ CGN->removeCallEdgeFor(CI);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ CGN->removeCallEdgeFor(II);
+ if (!I->use_empty())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+
+ // Get the list of successors of this block.
+ std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+ Succs[i]->removePredecessor(BB);
+
+ BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
new file mode 100644
index 0000000..928d92e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -0,0 +1,1265 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+// to indicate the weights of each edge coming out of the branch.
+// The weight of each edge is the weight of the target block for
+// that edge. The weight of a block B is computed as the maximum
+// number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <cctype>
+
+using namespace llvm;
+using namespace sampleprof;
+
+#define DEBUG_TYPE "sample-profile"
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+ "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+ "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+ cl::desc("Inlined functions that account for more than N% of all samples "
+ "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+ "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for more than N% of all samples "
+ "collected in the profile, will be marked as hot for the inliner "
+ "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+ "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for less than N% of all samples "
+ "collected in the profile, will be marked as cold for the inliner "
+ "to consider."));
+
+namespace {
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+ BlockEdgeMap;
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public ModulePass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+ Samples(nullptr), Filename(Name), ProfileIsValid(false),
+ TotalCollectedSamples(0) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+
+ void dump() { Reader->dump(); }
+
+ const char *getPassName() const override { return "Sample profile pass"; }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+protected:
+ bool runOnFunction(Function &F);
+ unsigned getFunctionLoc(Function &F);
+ bool emitAnnotations(Function &F);
+ ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+ ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+ const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineHotFunctions(Function &F);
+ bool emitInlineHints(Function &F);
+ void printEdgeWeight(raw_ostream &OS, Edge E);
+ void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+ void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
+ bool computeBlockWeights(Function &F);
+ void findEquivalenceClasses(Function &F);
+ void findEquivalencesFor(BasicBlock *BB1,
+ SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree);
+ void propagateWeights(Function &F);
+ uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ void buildEdges(Function &F);
+ bool propagateThroughEdges(Function &F);
+ void computeDominanceAndLoopInfo(Function &F);
+ unsigned getOffset(unsigned L, unsigned H) const;
+ void clearFunctionData();
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+
+ /// \brief Map edges to their computed weights.
+ ///
+ /// Edge weights are computed by propagating basic block weights in
+ /// SampleProfile::propagateWeights.
+ EdgeWeightMap EdgeWeights;
+
+ /// \brief Set of visited blocks during propagation.
+ SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
+
+ /// \brief Set of visited edges during propagation.
+ SmallSet<Edge, 128> VisitedEdges;
+
+ /// \brief Equivalence classes for block weights.
+ ///
+ /// Two blocks BB1 and BB2 are in the same equivalence class if they
+ /// dominate and post-dominate each other, and they are in the same loop
+ /// nest. When this happens, the two blocks are guaranteed to execute
+ /// the same number of times.
+ EquivalenceClassMap EquivalenceClass;
+
+ /// \brief Dominance, post-dominance and loop information.
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<LoopInfo> LI;
+
+ /// \brief Predecessors for each basic block in the CFG.
+ BlockEdgeMap Predecessors;
+
+ /// \brief Successors for each basic block in the CFG.
+ BlockEdgeMap Successors;
+
+ /// \brief Profile reader object.
+ std::unique_ptr<SampleProfileReader> Reader;
+
+ /// \brief Samples collected for the body of this function.
+ FunctionSamples *Samples;
+
+ /// \brief Name of the profile file to load.
+ StringRef Filename;
+
+ /// \brief Flag indicating whether the profile input loaded successfully.
+ bool ProfileIsValid;
+
+ /// \brief Total number of samples collected in this profile.
+ ///
+ /// This is the sum of all the samples collected in all the functions executed
+ /// at runtime.
+ uint64_t TotalCollectedSamples;
+};
+
+class SampleCoverageTracker {
+public:
+ SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+ bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+ uint32_t Discriminator, uint64_t Samples);
+ unsigned computeCoverage(unsigned Used, unsigned Total) const;
+ unsigned countUsedRecords(const FunctionSamples *FS) const;
+ unsigned countBodyRecords(const FunctionSamples *FS) const;
+ uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+ uint64_t countBodySamples(const FunctionSamples *FS) const;
+ void clear() {
+ SampleCoverage.clear();
+ TotalUsedSamples = 0;
+ }
+
+private:
+ typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+ typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+ FunctionSamplesCoverageMap;
+
+ /// Coverage map for sampling records.
+ ///
+ /// This map keeps a record of sampling records that have been matched to
+ /// an IR instruction. This is used to detect some form of staleness in
+ /// profiles (see flag -sample-profile-check-coverage).
+ ///
+ /// Each entry in the map corresponds to a FunctionSamples instance. This is
+ /// another map that counts how many times the sample record at the
+ /// given location has been used.
+ FunctionSamplesCoverageMap SampleCoverage;
+
+ /// Number of samples used from the profile.
+ ///
+ /// When a sampling record is used for the first time, the samples from
+ /// that record are added to this accumulator. Coverage is later computed
+ /// based on the total number of samples available in this function and
+ /// its callsites.
+ ///
+ /// Note that this accumulator tracks samples used from a single function
+ /// and all the inlined callsites. Strictly, we should have a map of counters
+ /// keyed by FunctionSamples pointers, but these stats are cleared after
+ /// every function, so we just need to keep a single counter.
+ uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+ if (ParentTotalSamples == 0)
+ return false; // Avoid division by zero.
+
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (CallsiteTotalSamples == 0)
+ return false; // Callsite is trivially cold.
+
+ double PercentSamples =
+ (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+ return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples()) {
+ const FunctionSamples *CalleeSamples = &I.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ VisitedBlocks.clear();
+ VisitedEdges.clear();
+ EquivalenceClass.clear();
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ Predecessors.clear();
+ Successors.clear();
+ CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L Lineno
+/// \param H Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+ return (L - H) & 0xffff;
+}
+
+/// \brief Print the weight of edge \p E on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param E Edge to print.
+void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
+ OS << "weight[" << E.first->getName() << "->" << E.second->getName()
+ << "]: " << EdgeWeights[E] << "\n";
+}
+
+/// \brief Print the equivalence class of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
+ const BasicBlock *BB) {
+ const BasicBlock *Equiv = EquivalenceClass[BB];
+ OS << "equivalence[" << BB->getName()
+ << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
+}
+
+/// \brief Print the weight of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+ const BasicBlock *BB) const {
+ const auto &I = BlockWeights.find(BB);
+ uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+ OS << "weight[" << BB->getName() << "]: " << W << "\n";
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use HeaderLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using BodySamples.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
+ DebugLoc DLoc = Inst.getDebugLoc();
+ if (!DLoc)
+ return std::error_code();
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (!FS)
+ return std::error_code();
+
+ const DILocation *DIL = DLoc;
+ unsigned Lineno = DLoc.getLine();
+ unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+ uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+ uint32_t Discriminator = DIL->getDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+ if (R) {
+ bool FirstMark =
+ CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+ if (FirstMark) {
+ const Function *F = Inst.getParent()->getParent();
+ LLVMContext &Ctx = F->getContext();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, *F, DLoc,
+ Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+ Twine(LineOffset) +
+ ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ }
+ DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
+ << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+ << DIL->getDiscriminator() << " - weight: " << R.get()
+ << ")\n");
+ }
+ return R;
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p BB is the maximum weight of all the
+/// instructions in BB.
+///
+/// \param BB The basic block to query.
+///
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+ bool Found = false;
+ uint64_t Weight = 0;
+ for (auto &I : BB->getInstList()) {
+ const ErrorOr<uint64_t> &R = getInstWeight(I);
+ if (R && R.get() >= Weight) {
+ Weight = R.get();
+ Found = true;
+ }
+ }
+ if (Found)
+ return Weight;
+ else
+ return std::error_code();
+}
+
+/// \brief Compute and store the weights of every basic block.
+///
+/// This populates the BlockWeights map by computing
+/// the weights of every basic block in the CFG.
+///
+/// \param F The function to query.
+bool SampleProfileLoader::computeBlockWeights(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Block weights\n");
+ for (const auto &BB : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+ if (Weight) {
+ BlockWeights[&BB] = Weight.get();
+ VisitedBlocks.insert(&BB);
+ Changed = true;
+ }
+ DEBUG(printBlockWeight(dbgs(), &BB));
+ }
+
+ return Changed;
+}
+
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return nullptr;
+ }
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+
+ Function *CalleeFunc = Inst.getCalledFunction();
+ if (!CalleeFunc) {
+ return nullptr;
+ }
+
+ StringRef CalleeName = CalleeFunc->getName();
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return nullptr;
+
+ return FS->findFunctionSamplesAt(
+ CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+ SmallVector<CallsiteLocation, 10> S;
+ const DILocation *DIL = Inst.getDebugLoc();
+ if (!DIL) {
+ return Samples;
+ }
+ StringRef CalleeName;
+ for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+ DIL = DIL->getInlinedAt()) {
+ DISubprogram *SP = DIL->getScope()->getSubprogram();
+ if (!SP)
+ return nullptr;
+ if (!CalleeName.empty()) {
+ S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator(), CalleeName));
+ }
+ CalleeName = SP->getLinkageName();
+ }
+ if (S.size() == 0)
+ return Samples;
+ const FunctionSamples *FS = Samples;
+ for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+ FS = FS->findFunctionSamplesAt(S[i]);
+ }
+ return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+ if (TotalCollectedSamples == 0)
+ return false;
+
+ uint64_t FunctionSamples = Samples->getTotalSamples();
+ double SamplesPercent =
+ (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+ // If the function collected more samples than the hot threshold, mark
+ // it globally hot.
+ if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied inline hint to globally hot function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ // If the function collected fewer samples than the cold threshold, mark
+ // it globally cold.
+ if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+ F.addFnAttr(llvm::Attribute::Cold);
+ std::string Msg;
+ raw_string_ostream S(Msg);
+ S << "Applied cold hint to globally cold function '" << F.getName()
+ << "' with " << format("%.2f", SamplesPercent)
+ << "% of samples (threshold: "
+ << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+ S.flush();
+ emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+ bool Changed = false;
+ LLVMContext &Ctx = F.getContext();
+ while (true) {
+ bool LocalChanged = false;
+ SmallVector<CallInst *, 10> CIS;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+ CIS.push_back(CI);
+ }
+ }
+ for (auto CI : CIS) {
+ InlineFunctionInfo IFI;
+ Function *CalledFunction = CI->getCalledFunction();
+ DebugLoc DLoc = CI->getDebugLoc();
+ uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+ if (InlineFunction(CI, IFI)) {
+ LocalChanged = true;
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+ Twine("inlined hot callee '") +
+ CalledFunction->getName() + "' with " +
+ Twine(NumSamples) + " samples into '" +
+ F.getName() + "'");
+ }
+ }
+ if (LocalChanged) {
+ Changed = true;
+ } else {
+ break;
+ }
+ }
+ return Changed;
+}
+
+/// \brief Find equivalence classes for the given block.
+///
+/// This finds all the blocks that are guaranteed to execute the same
+/// number of times as \p BB1. To do this, it traverses all the
+/// descendants of \p BB1 in the dominator or post-dominator tree.
+///
+/// A block BB2 will be in the same equivalence class as \p BB1 if
+/// the following holds:
+///
+/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
+/// is a descendant of \p BB1 in the dominator tree, then BB2 should
+/// dominate BB1 in the post-dominator tree.
+///
+/// 2- Both BB2 and \p BB1 must be in the same loop.
+///
+/// For every block BB2 that meets those two requirements, we set BB2's
+/// equivalence class to \p BB1.
+///
+/// \param BB1 Block to check.
+/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
+/// \param DomTree Opposite dominator tree. If \p Descendants is filled
+/// with blocks from \p BB1's dominator tree, then
+/// this is the post-dominator tree, and vice versa.
+void SampleProfileLoader::findEquivalencesFor(
+ BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree) {
+ const BasicBlock *EC = EquivalenceClass[BB1];
+ uint64_t Weight = BlockWeights[EC];
+ for (const auto *BB2 : Descendants) {
+ bool IsDomParent = DomTree->dominates(BB2, BB1);
+ bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
+ if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+ EquivalenceClass[BB2] = EC;
+
+ // If BB2 is heavier than BB1, make BB2 have the same weight
+ // as BB1.
+ //
+ // Note that we don't worry about the opposite situation here
+ // (when BB2 is lighter than BB1). We will deal with this
+ // during the propagation phase. Right now, we just want to
+ // make sure that BB1 has the largest weight of all the
+ // members of its equivalence set.
+ Weight = std::max(Weight, BlockWeights[BB2]);
+ }
+ }
+ BlockWeights[EC] = Weight;
+}
+
+/// \brief Find equivalence classes.
+///
+/// Since samples may be missing from blocks, we can fill in the gaps by setting
+/// the weights of all the blocks in the same equivalence class to the same
+/// weight. To compute the concept of equivalence, we use dominance and loop
+/// information. Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// \param F The function to query.
+void SampleProfileLoader::findEquivalenceClasses(Function &F) {
+ SmallVector<BasicBlock *, 8> DominatedBBs;
+ DEBUG(dbgs() << "\nBlock equivalence classes\n");
+ // Find equivalence sets based on dominance and post-dominance information.
+ for (auto &BB : F) {
+ BasicBlock *BB1 = &BB;
+
+ // Compute BB1's equivalence class once.
+ if (EquivalenceClass.count(BB1)) {
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ continue;
+ }
+
+ // By default, blocks are in their own equivalence class.
+ EquivalenceClass[BB1] = BB1;
+
+ // Traverse all the blocks dominated by BB1. We are looking for
+ // every basic block BB2 such that:
+ //
+ // 1- BB1 dominates BB2.
+ // 2- BB2 post-dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, it means that BB2 is executed
+ // as many times as BB1, so they are placed in the same equivalence
+ // class by making BB2's equivalence class be BB1.
+ DominatedBBs.clear();
+ DT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ }
+
+ // Assign weights to equivalence classes.
+ //
+ // All the basic blocks in the same equivalence class will execute
+ // the same number of times. Since we know that the head block in
+ // each equivalence class has the largest weight, assign that weight
+ // to all the blocks in that equivalence class.
+ DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
+ for (auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EquivBB = EquivalenceClass[BB];
+ if (BB != EquivBB)
+ BlockWeights[BB] = BlockWeights[EquivBB];
+ DEBUG(printBlockWeight(dbgs(), BB));
+ }
+}
+
+/// \brief Visit the given edge to decide if it has a valid weight.
+///
+/// If \p E has not been visited before, we copy to \p UnknownEdge
+/// and increment the count of unknown edges.
+///
+/// \param E Edge to visit.
+/// \param NumUnknownEdges Current number of unknown edges.
+/// \param UnknownEdge Set if E has not been visited before.
+///
+/// \returns E's weight, if known. Otherwise, return 0.
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+ Edge *UnknownEdge) {
+ if (!VisitedEdges.count(E)) {
+ (*NumUnknownEdges)++;
+ *UnknownEdge = E;
+ return 0;
+ }
+
+ return EdgeWeights[E];
+}
+
+/// \brief Propagate weights through incoming/outgoing edges.
+///
+/// If the weight of a basic block is known, and there is only one edge
+/// with an unknown weight, we can calculate the weight of that edge.
+///
+/// Similarly, if all the edges have a known count, we can calculate the
+/// count of the basic block, if needed.
+///
+/// \param F Function to process.
+///
+/// \returns True if new weights were assigned to edges or blocks.
+bool SampleProfileLoader::propagateThroughEdges(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "\nPropagation through edges\n");
+ for (const auto &BI : F) {
+ const BasicBlock *BB = &BI;
+ const BasicBlock *EC = EquivalenceClass[BB];
+
+ // Visit all the predecessor and successor edges to determine
+ // which ones have a weight assigned already. Note that it doesn't
+ // matter that we only keep track of a single unknown edge. The
+ // only case we are interested in handling is when only a single
+ // edge is unknown (see setEdgeOrBlockWeight).
+ for (unsigned i = 0; i < 2; i++) {
+ uint64_t TotalWeight = 0;
+ unsigned NumUnknownEdges = 0;
+ Edge UnknownEdge, SelfReferentialEdge;
+
+ if (i == 0) {
+ // First, visit all predecessor edges.
+ for (auto *Pred : Predecessors[BB]) {
+ Edge E = std::make_pair(Pred, BB);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ if (E.first == E.second)
+ SelfReferentialEdge = E;
+ }
+ } else {
+ // On the second round, visit all successor edges.
+ for (auto *Succ : Successors[BB]) {
+ Edge E = std::make_pair(BB, Succ);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ }
+ }
+
+ // After visiting all the edges, there are three cases that we
+ // can handle immediately:
+ //
+ // - All the edge weights are known (i.e., NumUnknownEdges == 0).
+ // In this case, we simply check that the sum of all the edges
+ // is the same as BB's weight. If not, we change BB's weight
+ // to match. Additionally, if BB had not been visited before,
+ // we mark it visited.
+ //
+ // - Only one edge is unknown and BB has already been visited.
+ // In this case, we can compute the weight of the edge by
+ // subtracting the total block weight from all the known
+ // edge weights. If the edges weight more than BB, then the
+ // edge of the last remaining edge is set to zero.
+ //
+ // - There exists a self-referential edge and the weight of BB is
+ // known. In this case, this edge can be based on BB's weight.
+ // We add up all the other known edges and set the weight on
+ // the self-referential edge as we did in the previous case.
+ //
+ // In any other case, we must continue iterating. Eventually,
+ // all edges will get a weight, or iteration will stop when
+ // it reaches SampleProfileMaxPropagateIterations.
+ if (NumUnknownEdges <= 1) {
+ uint64_t &BBWeight = BlockWeights[EC];
+ if (NumUnknownEdges == 0) {
+ // If we already know the weight of all edges, the weight of the
+ // basic block can be computed. It should be no larger than the sum
+ // of all edge weights.
+ if (TotalWeight > BBWeight) {
+ BBWeight = TotalWeight;
+ Changed = true;
+ DEBUG(dbgs() << "All edge weights for " << BB->getName()
+ << " known. Set weight for block: ";
+ printBlockWeight(dbgs(), BB););
+ }
+ if (VisitedBlocks.insert(EC).second)
+ Changed = true;
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
+ // If there is a single unknown edge and the block has been
+ // visited, then we can compute E's weight.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[UnknownEdge] = 0;
+ VisitedEdges.insert(UnknownEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set weight for edge: ";
+ printEdgeWeight(dbgs(), UnknownEdge));
+ }
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+ uint64_t &BBWeight = BlockWeights[BB];
+ // We have a self-referential edge and the weight of BB is known.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[SelfReferentialEdge] = 0;
+ VisitedEdges.insert(SelfReferentialEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set self-referential edge weight to: ";
+ printEdgeWeight(dbgs(), SelfReferentialEdge));
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// \brief Build in/out edge lists for each basic block in the CFG.
+///
+/// We are interested in unique edges. If a block B1 has multiple
+/// edges to another block B2, we only add a single B1->B2 edge.
+void SampleProfileLoader::buildEdges(Function &F) {
+ for (auto &BI : F) {
+ BasicBlock *B1 = &BI;
+
+ // Add predecessors for B1.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ if (!Predecessors[B1].empty())
+ llvm_unreachable("Found a stale predecessors list in a basic block.");
+ for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
+ BasicBlock *B2 = *PI;
+ if (Visited.insert(B2).second)
+ Predecessors[B1].push_back(B2);
+ }
+
+ // Add successors for B1.
+ Visited.clear();
+ if (!Successors[B1].empty())
+ llvm_unreachable("Found a stale successors list in a basic block.");
+ for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
+ BasicBlock *B2 = *SI;
+ if (Visited.insert(B2).second)
+ Successors[B1].push_back(B2);
+ }
+ }
+}
+
+/// \brief Propagate weights into edges
+///
+/// The following rules are applied to every block BB in the CFG:
+///
+/// - If BB has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all incoming or outgoing edges are known except one, and the
+/// weight of the block is already known, the weight of the unknown
+/// edge will be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than BB's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+void SampleProfileLoader::propagateWeights(Function &F) {
+ bool Changed = true;
+ unsigned I = 0;
+
+ // Add an entry count to the function using the samples gathered
+ // at the function entry.
+ F.setEntryCount(Samples->getHeadSamples());
+
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
+
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F);
+ }
+
+ // Generate MD_prof metadata for every branch instruction using the
+ // edge weights computed during propagation.
+ DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
+ LLVMContext &Ctx = F.getContext();
+ MDBuilder MDB(Ctx);
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
+ TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+
+ DEBUG(dbgs() << "\nGetting weights for branch at line "
+ << TI->getDebugLoc().getLine() << ".\n");
+ SmallVector<uint32_t, 4> Weights;
+ uint32_t MaxWeight = 0;
+ DebugLoc MaxDestLoc;
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t Weight = EdgeWeights[E];
+ DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
+ // Use uint32_t saturated arithmetic to adjust the incoming weights,
+ // if needed. Sample counts in profiles are 64-bit unsigned values,
+ // but internally branch weights are expressed as 32-bit values.
+ if (Weight > std::numeric_limits<uint32_t>::max()) {
+ DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+ Weight = std::numeric_limits<uint32_t>::max();
+ }
+ Weights.push_back(static_cast<uint32_t>(Weight));
+ if (Weight != 0) {
+ if (Weight > MaxWeight) {
+ MaxWeight = Weight;
+ MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ }
+ }
+ }
+
+ // Only set weights if there is at least one non-zero weight.
+ // In any other case, let the analyzer set weights.
+ if (MaxWeight > 0) {
+ DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ DebugLoc BranchLoc = TI->getDebugLoc();
+ emitOptimizationRemark(
+ Ctx, DEBUG_TYPE, F, MaxDestLoc,
+ Twine("most popular destination for conditional branches at ") +
+ ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+ Twine(BranchLoc.getLine()) + ":" +
+ Twine(BranchLoc.getCol()))
+ : Twine("<UNKNOWN LOCATION>")));
+ } else {
+ DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ }
+ }
+}
+
+/// \brief Get the line number for the function header.
+///
+/// This looks up function \p F in the current compilation unit and
+/// retrieves the line number where the function is defined. This is
+/// line 0 for all the samples read from the profile file. Every line
+/// number is relative to this line.
+///
+/// \param F Function object to query.
+///
+/// \returns the line number where \p F is defined. If it returns 0,
+/// it means that there is no debug information available for \p F.
+unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
+ if (DISubprogram *S = getDISubprogram(&F))
+ return S->getLine();
+
+ // If the start of \p F is missing, emit a diagnostic to inform the user
+ // about the missed opportunity.
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ "No debug information found in function " + F.getName() +
+ ": Function profile not used",
+ DS_Warning));
+ return 0;
+}
+
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT->recalculate(F);
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// Branch weights are computed out of instruction samples using a
+/// propagation heuristic. Propagation proceeds in 3 phases:
+///
+/// 1- Assignment of block weights. All the basic blocks in the function
+/// are initial assigned the same weight as their most frequently
+/// executed instruction.
+///
+/// 2- Creation of equivalence classes. Since samples may be missing from
+/// blocks, we can fill in the gaps by setting the weights of all the
+/// blocks in the same equivalence class to the same weight. To compute
+/// the concept of equivalence, we use dominance and loop information.
+/// Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// 3- Propagation of block weights into edges. This uses a simple
+/// propagation heuristic. The following rules are applied to every
+/// block BB in the CFG:
+///
+/// - If BB has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all the edges are known except one, and the weight of the
+/// block is already known, the weight of the unknown edge will
+/// be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than BB's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+///
+/// Since this propagation is not guaranteed to finalize for every CFG, we
+/// only allow it to proceed for a limited number of iterations (controlled
+/// by -sample-profile-max-propagate-iterations).
+///
+/// FIXME: Try to replace this propagation heuristic with a scheme
+/// that is guaranteed to finalize. A work-list approach similar to
+/// the standard value propagation algorithm used by SSA-CCP might
+/// work here.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on BB using the computed values for each of its branches.
+///
+/// \param F The function to query.
+///
+/// \returns true if \p F was modified. Returns false, otherwise.
+bool SampleProfileLoader::emitAnnotations(Function &F) {
+ bool Changed = false;
+
+ if (getFunctionLoc(F) == 0)
+ return false;
+
+ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
+ << ": " << getFunctionLoc(F) << "\n");
+
+ Changed |= emitInlineHints(F);
+
+ Changed |= inlineHotFunctions(F);
+
+ // Compute basic block weights.
+ Changed |= computeBlockWeights(F);
+
+ if (Changed) {
+ // Compute dominance and loop info needed for propagation.
+ computeDominanceAndLoopInfo(F);
+
+ // Find equivalence classes.
+ findEquivalenceClasses(F);
+
+ // Propagate weights to all edges.
+ propagateWeights(F);
+ }
+
+ // If coverage checking was requested, compute it now.
+ if (SampleProfileRecordCoverage) {
+ unsigned Used = CoverageTracker.countUsedRecords(Samples);
+ unsigned Total = CoverageTracker.countBodyRecords(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileRecordCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+
+ if (SampleProfileSampleCoverage) {
+ uint64_t Used = CoverageTracker.getTotalUsedSamples();
+ uint64_t Total = CoverageTracker.countBodySamples(Samples);
+ unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+ if (Coverage < SampleProfileSampleCoverage) {
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+ Twine(Coverage) + "%) were applied",
+ DS_Warning));
+ }
+ }
+ return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
+INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ return true;
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass() {
+ return new SampleProfileLoader(SampleProfileFile);
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ return new SampleProfileLoader(Name);
+}
+
+bool SampleProfileLoader::runOnModule(Module &M) {
+ if (!ProfileIsValid)
+ return false;
+
+ // Compute the total number of samples collected in this profile.
+ for (const auto &I : Reader->getProfiles())
+ TotalCollectedSamples += I.second.getTotalSamples();
+
+ bool retval = false;
+ for (auto &F : M)
+ if (!F.isDeclaration()) {
+ clearFunctionData();
+ retval |= runOnFunction(F);
+ }
+ return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ Samples = Reader->getSamplesFor(F);
+ if (!Samples->empty())
+ return emitAnnotations(F);
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 0000000..c94cc7c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,84 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+static bool stripDeadPrototypes(Module &M) {
+ bool MadeChange = false;
+
+ // Erase dead function prototypes.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+ Function *F = &*I++;
+ // Function must be a prototype and unused.
+ if (F->isDeclaration() && F->use_empty()) {
+ F->eraseFromParent();
+ ++NumDeadPrototypes;
+ MadeChange = true;
+ }
+ }
+
+ // Erase dead global var prototypes.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ) {
+ GlobalVariable *GV = &*I++;
+ // Global must be a prototype and unused.
+ if (GV->isDeclaration() && GV->use_empty())
+ GV->eraseFromParent();
+ }
+
+ // Return an indication of whether we changed anything or not.
+ return MadeChange;
+}
+
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+ if (stripDeadPrototypes(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ return stripDeadPrototypes(M);
+ }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false)
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+ return new StripDeadPrototypesLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 0000000..46f352f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,363 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+// * names for virtual registers
+// * symbols for internal globals and functions
+// * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+namespace {
+ class StripSymbols : public ModulePass {
+ bool OnlyDebugInfo;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripSymbols(bool ODI = false)
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripNonDebugSymbols : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripNonDebugSymbols()
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDebugDeclare : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDebugDeclare()
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ };
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+ return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+ return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+ return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+ for (User *U : V->users())
+ if (U != Usr)
+ return false;
+
+ return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+ assert(C->use_empty() && "Constant is not dead!");
+ SmallPtrSet<Constant*, 4> Operands;
+ for (Value *Op : C->operands())
+ if (OnlyUsedBy(Op, C))
+ Operands.insert(cast<Constant>(Op));
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
+ GV->eraseFromParent();
+ }
+ else if (!isa<Function>(C))
+ if (isa<CompositeType>(C->getType()))
+ C->destroyConstant();
+
+ // If the constant referenced anything, see if we can delete it as well.
+ for (Constant *O : Operands)
+ RemoveDeadConstant(O);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+ for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+ Value *V = VI->getValue();
+ ++VI;
+ if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+ if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+ // Set name to "", removing from symbol table!
+ V->setName("");
+ }
+ }
+}
+
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+ TypeFinder StructTypes;
+ StructTypes.run(M, false);
+
+ for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+ StructType *STy = StructTypes[i];
+ if (STy->isLiteral() || STy->getName().empty()) continue;
+
+ if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+ continue;
+
+ STy->setName("");
+ }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
+ if (!LLVMUsed) return;
+ UsedValues.insert(LLVMUsed);
+
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+ SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+ findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+ findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
+ if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+ I->setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ }
+
+ // Remove all names from types.
+ StripTypeNames(M, PreserveDbgInfo);
+
+ return true;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+ bool Changed = false;
+ Changed |= StripDebugInfo(M);
+ if (!OnlyDebugInfo)
+ Changed |= StripSymbolNames(M, false);
+ return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+ return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+ Function *Declare = M.getFunction("llvm.dbg.declare");
+ std::vector<Constant*> DeadConstants;
+
+ if (Declare) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->user_back());
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
+ assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+ CI->eraseFromParent();
+ if (Arg1->use_empty()) {
+ if (Constant *C = dyn_cast<Constant>(Arg1))
+ DeadConstants.push_back(C);
+ else
+ RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+ }
+ if (Arg2->use_empty())
+ if (Constant *C = dyn_cast<Constant>(Arg2))
+ DeadConstants.push_back(C);
+ }
+ Declare->eraseFromParent();
+ }
+
+ while (!DeadConstants.empty()) {
+ Constant *C = DeadConstants.back();
+ DeadConstants.pop_back();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->hasLocalLinkage())
+ RemoveDeadConstant(GV);
+ } else
+ RemoveDeadConstant(C);
+ }
+
+ return true;
+}
+
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ LLVMContext &C = M.getContext();
+
+ // Find all debug info in F. This is actually overkill in terms of what we
+ // want to do, but we want to try and be as resilient as possible in the face
+ // of potential debug info changes by using the formal interfaces given to us
+ // as much as possible.
+ DebugInfoFinder F;
+ F.processModule(M);
+
+ // For each compile unit, find the live set of global variables/functions and
+ // replace the current list of potentially dead global variables/functions
+ // with the live list.
+ SmallVector<Metadata *, 64> LiveGlobalVariables;
+ SmallVector<Metadata *, 64> LiveSubprograms;
+ DenseSet<const MDNode *> VisitedSet;
+
+ std::set<DISubprogram *> LiveSPs;
+ for (Function &F : M) {
+ if (DISubprogram *SP = F.getSubprogram())
+ LiveSPs.insert(SP);
+ }
+
+ for (DICompileUnit *DIC : F.compile_units()) {
+ // Create our live subprogram list.
+ bool SubprogramChange = false;
+ for (DISubprogram *DISP : DIC->getSubprograms()) {
+ // Make sure we visit each subprogram only once.
+ if (!VisitedSet.insert(DISP).second)
+ continue;
+
+ // If the function referenced by DISP is not null, the function is live.
+ if (LiveSPs.count(DISP))
+ LiveSubprograms.push_back(DISP);
+ else
+ SubprogramChange = true;
+ }
+
+ // Create our live global variable list.
+ bool GlobalVariableChange = false;
+ for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
+ // Make sure we only visit each global variable only once.
+ if (!VisitedSet.insert(DIG).second)
+ continue;
+
+ // If the global variable referenced by DIG is not null, the global
+ // variable is live.
+ if (DIG->getVariable())
+ LiveGlobalVariables.push_back(DIG);
+ else
+ GlobalVariableChange = true;
+ }
+
+ // If we found dead subprograms or global variables, replace the current
+ // subprogram list/global variable list with our new live subprogram/global
+ // variable list.
+ if (SubprogramChange) {
+ DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
+ Changed = true;
+ }
+
+ if (GlobalVariableChange) {
+ DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
+ Changed = true;
+ }
+
+ // Reset lists for the next iteration.
+ LiveSubprograms.clear();
+ LiveGlobalVariables.clear();
+ }
+
+ return Changed;
+}
OpenPOWER on IntegriCloud