28 files changed, 16633 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 0000000..0e05129
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,1019 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments.  In
+// practice, this means looking for internal functions that have pointer
+// arguments.  If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value.  This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded.  Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently.  This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "argpromotion"
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead     , "Number of dead pointer args eliminated");
+
+namespace {
+  /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+  ///
+  struct ArgPromotion : public CallGraphSCCPass {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnSCC(CallGraphSCC &SCC) override;
+    static char ID; // Pass identification, replacement for typeid
+    explicit ArgPromotion(unsigned maxElements = 3)
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
+      initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// A vector used to hold the indices of a single GEP instruction
+    typedef std::vector<uint64_t> IndicesVector;
+
+  private:
+    bool isDenselyPacked(Type *type, const DataLayout &DL);
+    bool canPaddingBeAccessed(Argument *Arg);
+    CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+                                 AAResults &AAR) const;
+    CallGraphNode *DoPromotion(Function *F,
+                              SmallPtrSetImpl<Argument*> &ArgsToPromote,
+                              SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
+    
+    using llvm::Pass::doInitialization;
+    bool doInitialization(CallGraph &CG) override;
+    /// The maximum number of elements to expand, or 0 for unlimited.
+    unsigned maxElements;
+  };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+  return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+  bool Changed = false, LocalChange;
+
+  do {  // Iterate until we stop promoting from this SCC.
+    LocalChange = false;
+    // Attempt to promote arguments from all functions in this SCC.
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      if (CallGraphNode *CGN = PromoteArguments(*I)) {
+        LocalChange = true;
+        SCC.ReplaceNode(*I, CGN);
+      }
+    }
+    Changed |= LocalChange;               // Remember that we changed something.
+  } while (LocalChange);
+  
+  return Changed;
+}
+
+/// \brief Checks if a type could have padding bytes.
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
+
+  // There is no size information, so be conservative.
+  if (!type->isSized())
+    return false;
+
+  // If the alloc size is not equal to the storage size, then there are padding
+  // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+  if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+    return false;
+
+  if (!isa<CompositeType>(type))
+    return true;
+
+  // For homogenous sequential types, check for padding within members.
+  if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+    return isa<PointerType>(seqTy) ||
+           isDenselyPacked(seqTy->getElementType(), DL);
+
+  // Check for padding within and between elements of a struct.
+  StructType *StructTy = cast<StructType>(type);
+  const StructLayout *Layout = DL.getStructLayout(StructTy);
+  uint64_t StartPos = 0;
+  for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+    Type *ElTy = StructTy->getElementType(i);
+    if (!isDenselyPacked(ElTy, DL))
+      return false;
+    if (StartPos != Layout->getElementOffsetInBits(i))
+      return false;
+    StartPos += DL.getTypeAllocSizeInBits(ElTy);
+  }
+
+  return true;
+}
+
+/// \brief Checks if the padding bytes of an argument could be accessed.
+bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+
+  assert(arg->hasByValAttr());
+
+  // Track all the pointers to the argument to make sure they are not captured.
+  SmallPtrSet<Value *, 16> PtrValues;
+  PtrValues.insert(arg);
+
+  // Track all of the stores.
+  SmallVector<StoreInst *, 16> Stores;
+
+  // Scan through the uses recursively to make sure the pointer is always used
+  // sanely.
+  SmallVector<Value *, 16> WorkList;
+  WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+  while (!WorkList.empty()) {
+    Value *V = WorkList.back();
+    WorkList.pop_back();
+    if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+      if (PtrValues.insert(V).second)
+        WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+    } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+      Stores.push_back(Store);
+    } else if (!isa<LoadInst>(V)) {
+      return true;
+    }
+  }
+
+// Check to make sure the pointers aren't captured
+  for (StoreInst *Store : Stores)
+    if (PtrValues.count(Store->getValueOperand()))
+      return true;
+
+  return false;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct).  If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+  Function *F = CGN->getFunction();
+
+  // Make sure that it is local to this module.
+  if (!F || !F->hasLocalLinkage()) return nullptr;
+
+  // Don't promote arguments for variadic functions. Adding, removing, or
+  // changing non-pack parameters can change the classification of pack
+  // parameters. Frontends encode that classification at the call site in the
+  // IR, while in the callee the classification is determined dynamically based
+  // on the number of registers consumed so far.
+  if (F->isVarArg()) return nullptr;
+
+  // First check: see if there are any pointer arguments!  If not, quick exit.
+  SmallVector<Argument*, 16> PointerArgs;
+  for (Argument &I : F->args())
+    if (I.getType()->isPointerTy())
+      PointerArgs.push_back(&I);
+  if (PointerArgs.empty()) return nullptr;
+
+  // Second check: make sure that all callers are direct callers.  We can't
+  // transform functions that have indirect callers.  Also see if the function
+  // is self-recursive.
+  bool isSelfRecursive = false;
+  for (Use &U : F->uses()) {
+    CallSite CS(U.getUser());
+    // Must be a direct call.
+    if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
+    
+    if (CS.getInstruction()->getParent()->getParent() == F)
+      isSelfRecursive = true;
+  }
+  
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  // We need to manually construct BasicAA directly in order to disable its use
+  // of other function analyses.
+  BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+  // Construct our own AA results for this function. We do this manually to
+  // work around the limitations of the legacy pass manager.
+  AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
+  // Check to see which arguments are promotable.  If an argument is promotable,
+  // add it to ArgsToPromote.
+  SmallPtrSet<Argument*, 8> ArgsToPromote;
+  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+  for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
+    Argument *PtrArg = PointerArgs[i];
+    Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+    // Replace sret attribute with noalias. This reduces register pressure by
+    // avoiding a register copy.
+    if (PtrArg->hasStructRetAttr()) {
+      unsigned ArgNo = PtrArg->getArgNo();
+      F->setAttributes(
+          F->getAttributes()
+              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      for (Use &U : F->uses()) {
+        CallSite CS(U.getUser());
+        CS.setAttributes(
+            CS.getAttributes()
+                .removeAttribute(F->getContext(), ArgNo + 1,
+                                 Attribute::StructRet)
+                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      }
+    }
+
+    // If this is a byval argument, and if the aggregate type is small, just
+    // pass the elements, which is always safe, if the passed value is densely
+    // packed or if we can prove the padding bytes are never accessed. This does
+    // not apply to inalloca.
+    bool isSafeToPromote =
+        PtrArg->hasByValAttr() &&
+        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+    if (isSafeToPromote) {
+      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+        if (maxElements > 0 && STy->getNumElements() > maxElements) {
+          DEBUG(dbgs() << "argpromotion disable promoting argument '"
+                << PtrArg->getName() << "' because it would require adding more"
+                << " than " << maxElements << " arguments to the function.\n");
+          continue;
+        }
+        
+        // If all the elements are single-value types, we can promote it.
+        bool AllSimple = true;
+        for (const auto *EltTy : STy->elements()) {
+          if (!EltTy->isSingleValueType()) {
+            AllSimple = false;
+            break;
+          }
+        }
+
+        // Safe to transform, don't even bother trying to "promote" it.
+        // Passing the elements as a scalar will allow scalarrepl to hack on
+        // the new alloca we introduce.
+        if (AllSimple) {
+          ByValArgsToTransform.insert(PtrArg);
+          continue;
+        }
+      }
+    }
+
+    // If the argument is a recursive type and we're in a recursive
+    // function, we could end up infinitely peeling the function argument.
+    if (isSelfRecursive) {
+      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+        bool RecursiveType = false;
+        for (const auto *EltTy : STy->elements()) {
+          if (EltTy == PtrArg->getType()) {
+            RecursiveType = true;
+            break;
+          }
+        }
+        if (RecursiveType)
+          continue;
+      }
+    }
+    
+    // Otherwise, see if we can promote the pointer to its value.
+    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
+      ArgsToPromote.insert(PtrArg);
+  }
+
+  // No promotable pointer arguments.
+  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
+    return nullptr;
+
+  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+  Function *Callee = Arg->getParent();
+  const DataLayout &DL = Callee->getParent()->getDataLayout();
+
+  unsigned ArgNo = Arg->getArgNo();
+
+  // Look at all call sites of the function.  At this pointer we know we only
+  // have direct callees.
+  for (User *U : Callee->users()) {
+    CallSite CS(U);
+    assert(CS && "Should only have direct calls!");
+
+    if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
+      return false;
+  }
+  return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+                     const ArgPromotion::IndicesVector &Longer) {
+  if (Prefix.size() > Longer.size())
+    return false;
+  return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+                     std::set<ArgPromotion::IndicesVector> &Set) {
+    std::set<ArgPromotion::IndicesVector>::iterator Low;
+    Low = Set.upper_bound(Indices);
+    if (Low != Set.begin())
+      Low--;
+    // Low is now the last element smaller than or equal to Indices. This means
+    // it points to a prefix of Indices (possibly Indices itself), if such
+    // prefix exists.
+    //
+    // This load is safe if any prefix of its operands is safe to load.
+    return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+                            std::set<ArgPromotion::IndicesVector> &Safe) {
+  std::set<ArgPromotion::IndicesVector>::iterator Low;
+  Low = Safe.upper_bound(ToMark);
+  // Guard against the case where Safe is empty
+  if (Low != Safe.begin())
+    Low--;
+  // Low is now the last element smaller than or equal to Indices. This
+  // means it points to a prefix of Indices (possibly Indices itself), if
+  // such prefix exists.
+  if (Low != Safe.end()) {
+    if (IsPrefix(*Low, ToMark))
+      // If there is already a prefix of these indices (or exactly these
+      // indices) marked a safe, don't bother adding these indices
+      return;
+
+    // Increment Low, so we can use it as a "insert before" hint
+    ++Low;
+  }
+  // Insert
+  Low = Safe.insert(Low, ToMark);
+  ++Low;
+  // If there we're a prefix of longer index list(s), remove those
+  std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+  while (Low != End && IsPrefix(ToMark, *Low)) {
+    std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+    ++Low;
+    Safe.erase(Remove);
+  }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
+                                           bool isByValOrInAlloca,
+                                           AAResults &AAR) const {
+  typedef std::set<IndicesVector> GEPIndicesSet;
+
+  // Quick exit for unused arguments
+  if (Arg->use_empty())
+    return true;
+
+  // We can only promote this argument if all of the uses are loads, or are GEP
+  // instructions (with constant indices) that are subsequently loaded.
+  //
+  // Promoting the argument causes it to be loaded in the caller
+  // unconditionally. This is only safe if we can prove that either the load
+  // would have happened in the callee anyway (ie, there is a load in the entry
+  // block) or the pointer passed in at every call site is guaranteed to be
+  // valid.
+  // In the former case, invalid loads can happen, but would have happened
+  // anyway, in the latter case, invalid loads won't happen. This prevents us
+  // from introducing an invalid load that wouldn't have happened in the
+  // original code.
+  //
+  // This set will contain all sets of indices that are loaded in the entry
+  // block, and thus are safe to unconditionally load in the caller.
+  //
+  // This optimization is also safe for InAlloca parameters, because it verifies
+  // that the address isn't captured.
+  GEPIndicesSet SafeToUnconditionallyLoad;
+
+  // This set contains all the sets of indices that we are planning to promote.
+  // This makes it possible to limit the number of arguments added.
+  GEPIndicesSet ToPromote;
+
+  // If the pointer is always valid, any load with first index 0 is valid.
+  if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+  // First, iterate the entry block and mark loads of (geps of) arguments as
+  // safe.
+  BasicBlock &EntryBlock = Arg->getParent()->front();
+  // Declare this here so we can reuse it
+  IndicesVector Indices;
+  for (Instruction &I : EntryBlock)
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      Value *V = LI->getPointerOperand();
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+        V = GEP->getPointerOperand();
+        if (V == Arg) {
+          // This load actually loads (part of) Arg? Check the indices then.
+          Indices.reserve(GEP->getNumIndices());
+          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+               II != IE; ++II)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+              Indices.push_back(CI->getSExtValue());
+            else
+              // We found a non-constant GEP index for this argument? Bail out
+              // right away, can't promote this argument at all.
+              return false;
+
+          // Indices checked out, mark them as safe
+          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+          Indices.clear();
+        }
+      } else if (V == Arg) {
+        // Direct loads are equivalent to a GEP with a single 0 index.
+        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+      }
+    }
+
+  // Now, iterate all uses of the argument to see if there are any uses that are
+  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+  SmallVector<LoadInst*, 16> Loads;
+  IndicesVector Operands;
+  for (Use &U : Arg->uses()) {
+    User *UR = U.getUser();
+    Operands.clear();
+    if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
+      // Don't hack volatile/atomic loads
+      if (!LI->isSimple()) return false;
+      Loads.push_back(LI);
+      // Direct loads are equivalent to a GEP with a zero index and then a load.
+      Operands.push_back(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
+      if (GEP->use_empty()) {
+        // Dead GEP's cause trouble later.  Just remove them if we run into
+        // them.
+        GEP->eraseFromParent();
+        // TODO: This runs the above loop over and over again for dead GEPs
+        // Couldn't we just do increment the UI iterator earlier and erase the
+        // use?
+        return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
+      }
+
+      // Ensure that all of the indices are constants.
+      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+        i != e; ++i)
+        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+          Operands.push_back(C->getSExtValue());
+        else
+          return false;  // Not a constant operand GEP!
+
+      // Ensure that the only users of the GEP are load instructions.
+      for (User *GEPU : GEP->users())
+        if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
+          // Don't hack volatile/atomic loads
+          if (!LI->isSimple()) return false;
+          Loads.push_back(LI);
+        } else {
+          // Other uses than load?
+          return false;
+        }
+    } else {
+      return false;  // Not a load or a GEP.
+    }
+
+    // Now, see if it is safe to promote this load / loads of this GEP. Loading
+    // is safe if Operands, or a prefix of Operands, is marked as safe.
+    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+      return false;
+
+    // See if we are already promoting a load with these indices. If not, check
+    // to make sure that we aren't promoting too many elements.  If so, nothing
+    // to do.
+    if (ToPromote.find(Operands) == ToPromote.end()) {
+      if (maxElements > 0 && ToPromote.size() == maxElements) {
+        DEBUG(dbgs() << "argpromotion not promoting argument '"
+              << Arg->getName() << "' because it would require adding more "
+              << "than " << maxElements << " arguments to the function.\n");
+        // We limit aggregate promotion to only promoting up to a fixed number
+        // of elements of the aggregate.
+        return false;
+      }
+      ToPromote.insert(std::move(Operands));
+    }
+  }
+
+  if (Loads.empty()) return true;  // No users, this is a dead argument.
+
+  // Okay, now we know that the argument is only used by load instructions and
+  // it is safe to unconditionally perform all of them. Use alias analysis to
+  // check to see if the pointer is guaranteed to not be modified from entry of
+  // the function to each of the load instructions.
+
+  // Because there could be several/many load instructions, remember which
+  // blocks we know to be transparent to the load.
+  SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+    // Check to see if the load is invalidated from the start of the block to
+    // the load itself.
+    LoadInst *Load = Loads[i];
+    BasicBlock *BB = Load->getParent();
+
+    MemoryLocation Loc = MemoryLocation::get(Load);
+    if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
+      return false;  // Pointer is invalidated!
+
+    // Now check every path from the entry block to the load for transparency.
+    // To do this, we perform a depth first search on the inverse CFG from the
+    // loading block.
+    for (BasicBlock *P : predecessors(BB)) {
+      for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+        if (AAR.canBasicBlockModify(*TranspBB, Loc))
+          return false;
+    }
+  }
+
+  // If the path from the entry of the function to each load is free of
+  // instructions that potentially invalidate the load, we can make the
+  // transformation!
+  return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function.  At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+                             SmallPtrSetImpl<Argument*> &ArgsToPromote,
+                             SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has modified arguments.
+  FunctionType *FTy = F->getFunctionType();
+  std::vector<Type*> Params;
+
+  typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+
+  // ScalarizedElements - If we are promoting a pointer that has elements
+  // accessed out of it, keep track of which elements are accessed so that we
+  // can add one argument for each.
+  //
+  // Arguments that are directly loaded will have a zero element value here, to
+  // handle cases where there are both a direct load and GEP accesses.
+  //
+  std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+  // OriginalLoads - Keep track of a representative load instruction from the
+  // original function so that we can tell the alias analysis implementation
+  // what the new GEP/Load instructions we are inserting look like.
+  // We need to keep the original loads for each argument and the elements
+  // of the argument that are accessed.
+  std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
+
+  // Attribute - Keep track of the parameter attributes for the arguments
+  // that we are *not* promoting. For the ones that we do promote, the parameter
+  // attributes are lost
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &PAL = F->getAttributes();
+
+  // Add any return attributes.
+  if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                              PAL.getRetAttributes()));
+
+  // First, determine the new argument list
+  unsigned ArgIndex = 1;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+       ++I, ++ArgIndex) {
+    if (ByValArgsToTransform.count(&*I)) {
+      // Simple byval argument? Just add all the struct element types.
+      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      StructType *STy = cast<StructType>(AgTy);
+      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+      ++NumByValArgsPromoted;
+    } else if (!ArgsToPromote.count(&*I)) {
+      // Unchanged argument
+      Params.push_back(I->getType());
+      AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
+      if (attrs.hasAttributes(ArgIndex)) {
+        AttrBuilder B(attrs, ArgIndex);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+      }
+    } else if (I->use_empty()) {
+      // Dead argument (which are always marked as promotable)
+      ++NumArgumentsDead;
+    } else {
+      // Okay, this is being promoted. This means that the only uses are loads
+      // or GEPs which are only used by loads
+
+      // In this table, we will track which indices are loaded from the argument
+      // (where direct loads are tracked as no indices).
+      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+      for (User *U : I->users()) {
+        Instruction *UI = cast<Instruction>(U);
+        Type *SrcTy;
+        if (LoadInst *L = dyn_cast<LoadInst>(UI))
+          SrcTy = L->getType();
+        else
+          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+        IndicesVector Indices;
+        Indices.reserve(UI->getNumOperands() - 1);
+        // Since loads will only have a single operand, and GEPs only a single
+        // non-index operand, this will record direct loads without any indices,
+        // and gep+loads with the GEP indices.
+        for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
+             II != IE; ++II)
+          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Indices.size() == 1 && Indices.front() == 0)
+          Indices.clear();
+        ArgIndices.insert(std::make_pair(SrcTy, Indices));
+        LoadInst *OrigLoad;
+        if (LoadInst *L = dyn_cast<LoadInst>(UI))
+          OrigLoad = L;
+        else
+          // Take any load, we will use it only to update Alias Analysis
+          OrigLoad = cast<LoadInst>(UI->user_back());
+        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
+      }
+
+      // Add a parameter to the function for each element passed in.
+      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+             E = ArgIndices.end(); SI != E; ++SI) {
+        // not allowed to dereference ->begin() if size() is 0
+        Params.push_back(GetElementPtrInst::getIndexedType(
+            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+            SI->second));
+        assert(Params.back());
+      }
+
+      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
+        ++NumArgumentsPromoted;
+      else
+        ++NumAggregatesPromoted;
+    }
+  }
+
+  // Add any function attributes.
+  if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
+                                              PAL.getFnAttributes()));
+
+  Type *RetTy = FTy->getReturnType();
+
+  // Construct the new function type using the new arguments.
+  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+  // Create the new function body and insert it into the module.
+  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+  NF->copyAttributesFrom(F);
+
+  // Patch the pointer to LLVM function in debug info descriptor.
+  NF->setSubprogram(F->getSubprogram());
+  F->setSubprogram(nullptr);
+
+  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+        << "From: " << *F);
+  
+  // Recompute the parameter attributes list based on the new arguments for
+  // the function.
+  NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
+  AttributesVec.clear();
+
+  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+  NF->takeName(F);
+
+  // Get the callgraph information that we need to update to reflect our
+  // changes.
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in the loaded pointers.
+  //
+  SmallVector<Value*, 16> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->user_back());
+    assert(CS.getCalledFunction() == F);
+    Instruction *Call = CS.getInstruction();
+    const AttributeSet &CallPAL = CS.getAttributes();
+
+    // Add any return attributes.
+    if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
+      AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                                CallPAL.getRetAttributes()));
+
+    // Loop over the operands, inserting GEP and loads in the caller as
+    // appropriate.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    ArgIndex = 1;
+    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I, ++AI, ++ArgIndex)
+      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+        Args.push_back(*AI);          // Unmodified argument
+
+        if (CallPAL.hasAttributes(ArgIndex)) {
+          AttrBuilder B(CallPAL, ArgIndex);
+          AttributesVec.
+            push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+        }
+      } else if (ByValArgsToTransform.count(&*I)) {
+        // Emit a GEP and load for each element of the struct.
+        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+        StructType *STy = cast<StructType>(AgTy);
+        Value *Idxs[2] = {
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+          Value *Idx = GetElementPtrInst::Create(
+              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+          // TODO: Tell AA about the new values?
+          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+        }
+      } else if (!I->use_empty()) {
+        // Non-dead argument: insert GEPs and loads as appropriate.
+        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+        // Store the Value* version of the indices in here, but declare it now
+        // for reuse.
+        std::vector<Value*> Ops;
+        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+               E = ArgIndices.end(); SI != E; ++SI) {
+          Value *V = *AI;
+          LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
+          if (!SI->second.empty()) {
+            Ops.reserve(SI->second.size());
+            Type *ElTy = V->getType();
+            for (IndicesVector::const_iterator II = SI->second.begin(),
+                                               IE = SI->second.end();
+                 II != IE; ++II) {
+              // Use i32 to index structs, and i64 for others (pointers/arrays).
+              // This satisfies GEP constraints.
+              Type *IdxTy = (ElTy->isStructTy() ?
+                    Type::getInt32Ty(F->getContext()) : 
+                    Type::getInt64Ty(F->getContext()));
+              Ops.push_back(ConstantInt::get(IdxTy, *II));
+              // Keep track of the type we're currently indexing.
+              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+            }
+            // And create a GEP to extract those indices.
+            V = GetElementPtrInst::Create(SI->first, V, Ops,
+                                          V->getName() + ".idx", Call);
+            Ops.clear();
+          }
+          // Since we're replacing a load make sure we take the alignment
+          // of the previous load.
+          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+          newLoad->setAlignment(OrigLoad->getAlignment());
+          // Transfer the AA info too.
+          AAMDNodes AAInfo;
+          OrigLoad->getAAMetadata(AAInfo);
+          newLoad->setAAMetadata(AAInfo);
+
+          Args.push_back(newLoad);
+        }
+      }
+
+    // Push any varargs arguments on the list.
+    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+      Args.push_back(*AI);
+      if (CallPAL.hasAttributes(ArgIndex)) {
+        AttrBuilder B(CallPAL, ArgIndex);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+      }
+    }
+
+    // Add any function attributes.
+    if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+      AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+                                                CallPAL.getFnAttributes()));
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
+                                                            AttributesVec));
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
+                                                          AttributesVec));
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+    Args.clear();
+    AttributesVec.clear();
+
+    // Update the callgraph to know that the callsite has been transformed.
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
+
+    if (!Call->use_empty()) {
+      Call->replaceAllUsesWith(New);
+      New->takeName(Call);
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
+  //
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I) {
+    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+      // If this is an unmodified argument, move the name and users over to the
+      // new version.
+      I->replaceAllUsesWith(&*I2);
+      I2->takeName(&*I);
+      ++I2;
+      continue;
+    }
+
+    if (ByValArgsToTransform.count(&*I)) {
+      // In the callee, we create an alloca, and store each of the new incoming
+      // arguments into the alloca.
+      Instruction *InsertPt = &NF->begin()->front();
+
+      // Just add all the struct element types.
+      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
+      StructType *STy = cast<StructType>(AgTy);
+      Value *Idxs[2] = {
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+        Value *Idx = GetElementPtrInst::Create(
+            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+            InsertPt);
+        I2->setName(I->getName()+"."+Twine(i));
+        new StoreInst(&*I2++, Idx, InsertPt);
+      }
+
+      // Anything that used the arg should now use the alloca.
+      I->replaceAllUsesWith(TheAlloca);
+      TheAlloca->takeName(&*I);
+
+      // If the alloca is used in a call, we must clear the tail flag since
+      // the callee now uses an alloca from the caller.
+      for (User *U : TheAlloca->users()) {
+        CallInst *Call = dyn_cast<CallInst>(U);
+        if (!Call)
+          continue;
+        Call->setTailCall(false);
+      }
+      continue;
+    }
+
+    if (I->use_empty())
+      continue;
+
+    // Otherwise, if we promoted this argument, then all users are load
+    // instructions (or GEPs with only load users), and all loads should be
+    // using the new argument that we added.
+    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+
+    while (!I->use_empty()) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+        assert(ArgIndices.begin()->second.empty() &&
+               "Load element should sort to front!");
+        I2->setName(I->getName()+".val");
+        LI->replaceAllUsesWith(&*I2);
+        LI->eraseFromParent();
+        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+              << "' in function '" << F->getName() << "'\n");
+      } else {
+        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+        IndicesVector Operands;
+        Operands.reserve(GEP->getNumIndices());
+        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+             II != IE; ++II)
+          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Operands.size() == 1 && Operands.front() == 0)
+          Operands.clear();
+
+        Function::arg_iterator TheArg = I2;
+        for (ScalarizeTable::iterator It = ArgIndices.begin();
+             It->second != Operands; ++It, ++TheArg) {
+          assert(It != ArgIndices.end() && "GEP not handled??");
+        }
+
+        std::string NewName = I->getName();
+        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+            NewName += "." + utostr(Operands[i]);
+        }
+        NewName += ".val";
+        TheArg->setName(NewName);
+
+        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+              << "' of function '" << NF->getName() << "'\n");
+
+        // All of the uses must be load instructions.  Replace them all with
+        // the argument specified by ArgNo.
+        while (!GEP->use_empty()) {
+          LoadInst *L = cast<LoadInst>(GEP->user_back());
+          L->replaceAllUsesWith(&*TheArg);
+          L->eraseFromParent();
+        }
+        GEP->eraseFromParent();
+      }
+    }
+
+    // Increment I2 past all of the arguments added for this promoted pointer.
+    std::advance(I2, ArgIndices.size());
+  }
+
+  NF_CGN->stealCalledFunctionsFrom(CG[F]);
+  
+  // Now that the old function is dead, delete it.  If there is a dangling
+  // reference to the CallgraphNode, just leave the dead function around for
+  // someone else to nuke.
+  CallGraphNode *CGN = CG[F];
+  if (CGN->getNumReferences() == 0)
+    delete CG.removeFunctionFromModule(CGN);
+  else
+    F->setLinkage(Function::ExternalLinkage);
+  
+  return NF_CGN;
+}
+
+bool ArgPromotion::doInitialization(CallGraph &CG) {
+  return CallGraphSCCPass::doInitialization(CG);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 0000000..6af1043
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+  static char ID; // Pass identification.
+
+  BarrierNoop() : ModulePass(ID) {
+    initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+                false, false)
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 0000000..0aa49d6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,222 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared.  This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "constmerge"
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+  struct ConstantMerge : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ConstantMerge() : ModulePass(ID) {
+      initializeConstantMergePass(*PassRegistry::getPassRegistry());
+    }
+
+    // For this pass, process all of the globals in the module, eliminating
+    // duplicate constants.
+    bool runOnModule(Module &M) override;
+
+    // Return true iff we can determine the alignment of this global variable.
+    bool hasKnownAlignment(GlobalVariable *GV) const;
+
+    // Return the alignment of the global, including converting the default
+    // alignment to a concrete value.
+    unsigned getAlignment(GlobalVariable *GV) const;
+
+  };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+                "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
+  if (!LLVMUsed) return;
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+    Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+    GlobalValue *GV = cast<GlobalValue>(Operand);
+    UsedValues.insert(GV);
+  }
+}
+
+// True if A is better than B.
+static bool IsBetterCanonical(const GlobalVariable &A,
+                              const GlobalVariable &B) {
+  if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+    return true;
+
+  if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+    return false;
+
+  return A.hasUnnamedAddr();
+}
+
+unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+  unsigned Align = GV->getAlignment();
+  if (Align)
+    return Align;
+  return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+
+  // Find all the globals that are marked "used".  These cannot be merged.
+  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
+  // Map unique constants to globals.
+  DenseMap<Constant *, GlobalVariable *> CMap;
+
+  // Replacements - This vector contains a list of replacements to perform.
+  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+  bool MadeChange = false;
+
+  // Iterate constant merging while we are still making progress.  Merging two
+  // constants together may allow us to merge other constants together if the
+  // second level constants have initializers which point to the globals that
+  // were just merged.
+  while (1) {
+
+    // First: Find the canonical constants others will be merged with.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = &*GVI++;
+
+      // If this GV is dead, remove it.
+      GV->removeDeadConstantUsers();
+      if (GV->use_empty() && GV->hasLocalLinkage()) {
+        GV->eraseFromParent();
+        continue;
+      }
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      // This transformation is legal for weak ODR globals in the sense it
+      // doesn't change semantics, but we really don't want to perform it
+      // anyway; it's likely to pessimize code generation, and some tools
+      // (like the Darwin linker in cases involving CFString) don't expect it.
+      if (GV->isWeakForLinker())
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *&Slot = CMap[Init];
+
+      // If this is the first constant we find or if the old one is local,
+      // replace with the current one. If the current is externally visible
+      // it cannot be replace, but can be the canonical constant we merge with.
+      if (!Slot || IsBetterCanonical(*GV, *Slot))
+        Slot = GV;
+    }
+
+    // Second: identify all globals that can be merged together, filling in
+    // the Replacements vector.  We cannot do the replacement in this pass
+    // because doing so may cause initializers of other globals to be rewritten,
+    // invalidating the Constant* pointers in CMap.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = &*GVI++;
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      // We can only replace constant with local linkage.
+      if (!GV->hasLocalLinkage())
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *Slot = CMap[Init];
+
+      if (!Slot || Slot == GV)
+        continue;
+
+      if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+        continue;
+
+      if (!GV->hasUnnamedAddr())
+        Slot->setUnnamedAddr(false);
+
+      // Make all uses of the duplicate constant use the canonical version.
+      Replacements.push_back(std::make_pair(GV, Slot));
+    }
+
+    if (Replacements.empty())
+      return MadeChange;
+    CMap.clear();
+
+    // Now that we have figured out which replacements must be made, do them all
+    // now.  This avoid invalidating the pointers in CMap, which are unneeded
+    // now.
+    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+      // Bump the alignment if necessary.
+      if (Replacements[i].first->getAlignment() ||
+          Replacements[i].second->getAlignment()) {
+        Replacements[i].second->setAlignment(
+            std::max(getAlignment(Replacements[i].first),
+                     getAlignment(Replacements[i].second)));
+      }
+
+      // Eliminate any uses of the dead global.
+      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+      // Delete the global value from the module.
+      assert(Replacements[i].first->hasLocalLinkage() &&
+             "Refusing to delete an externally visible global variable.");
+      Replacements[i].first->eraseFromParent();
+    }
+
+    NumMerged += Replacements.size();
+    Replacements.clear();
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 0000000..5bbb751
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+  static char ID;
+  CrossDSOCFI() : ModulePass(ID) {
+    initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+  }
+
+  Module *M;
+  MDNode *VeryLikelyWeights;
+
+  ConstantInt *extractBitSetTypeId(MDNode *MD);
+  void buildCFICheck();
+
+  bool doInitialization(Module &M) override;
+  bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+                      false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+  M = &Mod;
+  VeryLikelyWeights =
+      MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+  return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+  // This check excludes vtables for classes inside anonymous namespaces.
+  auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+  if (!TM)
+    return nullptr;
+  auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+  if (!C) return nullptr;
+  // We are looking for i64 constants.
+  if (C->getBitWidth() != 64) return nullptr;
+
+  // Sanity check.
+  auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+  // Can be null if a function was removed by an optimization.
+  if (FM) {
+    auto F = dyn_cast<Function>(FM->getValue());
+    // But can never be a function declaration.
+    assert(!F || !F->isDeclaration());
+    (void)F; // Suppress unused variable warning in the no-asserts build.
+  }
+  return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+  // FIXME: verify that __cfi_check ends up near the end of the code section,
+  // but before the jump slots created in LowerBitSets.
+  llvm::DenseSet<uint64_t> BitSetIds;
+  NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+  if (BitSetNM)
+    for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+      if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+        BitSetIds.insert(TypeId->getZExtValue());
+
+  LLVMContext &Ctx = M->getContext();
+  Constant *C = M->getOrInsertFunction(
+      "__cfi_check",
+      FunctionType::get(
+          Type::getVoidTy(Ctx),
+          {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+          false));
+  Function *F = dyn_cast<Function>(C);
+  F->setAlignment(4096);
+  auto args = F->arg_begin();
+  Argument &CallSiteTypeId = *(args++);
+  CallSiteTypeId.setName("CallSiteTypeId");
+  Argument &Addr = *(args++);
+  Addr.setName("Addr");
+  assert(args == F->arg_end());
+
+  BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+  BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+  IRBuilder<> IRBTrap(TrapBB);
+  Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+  llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+  TrapCall->setDoesNotReturn();
+  TrapCall->setDoesNotThrow();
+  IRBTrap.CreateUnreachable();
+
+  BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+  IRBuilder<> IRBExit(ExitBB);
+  IRBExit.CreateRetVoid();
+
+  IRBuilder<> IRB(BB);
+  SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+  for (uint64_t TypeId : BitSetIds) {
+    ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+    BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+    IRBuilder<> IRBTest(TestBB);
+    Function *BitsetTestFn =
+        Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+    Value *Test = IRBTest.CreateCall(
+        BitsetTestFn, {&Addr, MetadataAsValue::get(
+                                  Ctx, ConstantAsMetadata::get(CaseTypeId))});
+    BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+    BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+    SI->addCase(CaseTypeId, TestBB);
+    ++TypeIds;
+  }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+  if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+    return false;
+  buildCFICheck();
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 0000000..4de3d95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1133 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions.  Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions.  This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <map>
+#include <set>
+#include <tuple>
+using namespace llvm;
+
+#define DEBUG_TYPE "deadargelim"
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef, 
+          "Number of unread args replaced with undef");
+namespace {
+  /// DAE - The dead argument elimination pass.
+  ///
+  class DAE : public ModulePass {
+  public:
+
+    /// Struct that represents (part of) either a return value or a function
+    /// argument.  Used so that arguments and return values can be used
+    /// interchangeably.
+    struct RetOrArg {
+      RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+               IsArg(IsArg) {}
+      const Function *F;
+      unsigned Idx;
+      bool IsArg;
+
+      /// Make RetOrArg comparable, so we can put it into a map.
+      bool operator<(const RetOrArg &O) const {
+        return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg);
+      }
+
+      /// Make RetOrArg comparable, so we can easily iterate the multimap.
+      bool operator==(const RetOrArg &O) const {
+        return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+      }
+
+      std::string getDescription() const {
+        return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+                " of function " + F->getName()).str();
+      }
+    };
+
+    /// Liveness enum - During our initial pass over the program, we determine
+    /// that things are either alive or maybe alive. We don't mark anything
+    /// explicitly dead (even if we know they are), since anything not alive
+    /// with no registered uses (in Uses) will never be marked alive and will
+    /// thus become dead in the end.
+    enum Liveness { Live, MaybeLive };
+
+    /// Convenience wrapper
+    RetOrArg CreateRet(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, false);
+    }
+    /// Convenience wrapper
+    RetOrArg CreateArg(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, true);
+    }
+
+    typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+    /// This maps a return value or argument to any MaybeLive return values or
+    /// arguments it uses. This allows the MaybeLive values to be marked live
+    /// when any of its users is marked live.
+    /// For example (indices are left out for clarity):
+    ///  - Uses[ret F] = ret G
+    ///    This means that F calls G, and F returns the value returned by G.
+    ///  - Uses[arg F] = ret G
+    ///    This means that some function calls G and passes its result as an
+    ///    argument to F.
+    ///  - Uses[ret F] = arg F
+    ///    This means that F returns one of its own arguments.
+    ///  - Uses[arg F] = arg G
+    ///    This means that G calls F and passes one of its own (G's) arguments
+    ///    directly to F.
+    UseMap Uses;
+
+    typedef std::set<RetOrArg> LiveSet;
+    typedef std::set<const Function*> LiveFuncSet;
+
+    /// This set contains all values that have been determined to be live.
+    LiveSet LiveValues;
+    /// This set contains all values that are cannot be changed in any way.
+    LiveFuncSet LiveFunctions;
+
+    typedef SmallVector<RetOrArg, 5> UseVector;
+
+  protected:
+    // DAH uses this to specify a different ID.
+    explicit DAE(char &ID) : ModulePass(ID) {}
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DAE() : ModulePass(ID) {
+      initializeDAEPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) override;
+
+    virtual bool ShouldHackArguments() const { return false; }
+
+  private:
+    Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+    Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+                       unsigned RetValNum = -1U);
+    Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+    void SurveyFunction(const Function &F);
+    void MarkValue(const RetOrArg &RA, Liveness L,
+                   const UseVector &MaybeLiveUses);
+    void MarkLive(const RetOrArg &RA);
+    void MarkLive(const Function &F);
+    void PropagateLiveness(const RetOrArg &RA);
+    bool RemoveDeadStuffFromFunction(Function *F);
+    bool DeleteDeadVarargs(Function &Fn);
+    bool RemoveDeadArgumentsFromCallers(Function &Fn);
+  };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+  /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+  /// deletes arguments to functions which are external.  This is only for use
+  /// by bugpoint.
+  struct DAH : public DAE {
+    static char ID;
+    DAH() : DAE(ID) {}
+
+    bool ShouldHackArguments() const override { return true; }
+  };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r", 
+                "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+                false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+  assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+  if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+  // Ensure that the function is only directly called.
+  if (Fn.hasAddressTaken())
+    return false;
+
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (Fn.hasFnAttribute(Attribute::Naked)) {
+    return false;
+  }
+
+  // Okay, we know we can transform this function if safe.  Scan its body
+  // looking for calls marked musttail or calls to llvm.vastart.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(I);
+      if (!CI)
+        continue;
+      if (CI->isMustTailCall())
+        return false;
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+        if (II->getIntrinsicID() == Intrinsic::vastart)
+          return false;
+      }
+    }
+  }
+
+  // If we get here, there are no calls to llvm.vastart in the function body,
+  // remove the "..." and adjust all the calls.
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but doesn't have isVarArg set.
+  FunctionType *FTy = Fn.getFunctionType();
+
+  std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+                                                Params, false);
+  unsigned NumArgs = Params.size();
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, Fn.getLinkage());
+  NF->copyAttributesFrom(&Fn);
+  Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
+  NF->takeName(&Fn);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
+    CallSite CS(*I++);
+    if (!CS)
+      continue;
+    Instruction *Call = CS.getInstruction();
+
+    // Pass all the same arguments.
+    Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+    // Drop any attributes that were on the vararg arguments.
+    AttributeSet PAL = CS.getAttributes();
+    if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
+      SmallVector<AttributeSet, 8> AttributesVec;
+      for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
+        AttributesVec.push_back(PAL.getSlotAttributes(i));
+      if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+        AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
+                                                  PAL.getFnAttributes()));
+      PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+    }
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(PAL);
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(PAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty())
+      Call->replaceAllUsesWith(New);
+
+    New->takeName(Call);
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.  While we're at
+  // it, remove the dead arguments from the DeadArguments list.
+  //
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+    // Move the name and users over to the new version.
+    I->replaceAllUsesWith(&*I2);
+    I2->takeName(&*I);
+  }
+
+  // Patch the pointer to LLVM function in debug info descriptor.
+  NF->setSubprogram(Fn.getSubprogram());
+
+  // Fix up any BlockAddresses that refer to the function.
+  Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
+  // Delete the bitcast that we just created, so that NF does not
+  // appear to be address-taken.
+  NF->removeDeadConstantUsers();
+  // Finally, nuke the old function.
+  Fn.eraseFromParent();
+  return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any 
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+  // We cannot change the arguments if this TU does not define the function or
+  // if the linker may choose a function body from another TU, even if the
+  // nominal linkage indicates that other copies of the function have the same
+  // semantics. In the below example, the dead load from %p may not have been
+  // eliminated from the linker-chosen copy of f, so replacing %p with undef
+  // in callers may introduce undefined behavior.
+  //
+  // define linkonce_odr void @f(i32* %p) {
+  //   %v = load i32 %p
+  //   ret void
+  // }
+  if (!Fn.isStrongDefinitionForLinker())
+    return false;
+
+  // Functions with local linkage should already have been handled, except the
+  // fragile (variadic) ones which we can improve here.
+  if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+    return false;
+
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (Fn.hasFnAttribute(Attribute::Naked))
+    return false;
+
+  if (Fn.use_empty())
+    return false;
+
+  SmallVector<unsigned, 8> UnusedArgs;
+  for (Argument &Arg : Fn.args()) {
+    if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+      UnusedArgs.push_back(Arg.getArgNo());
+  }
+
+  if (UnusedArgs.empty())
+    return false;
+
+  bool Changed = false;
+
+  for (Use &U : Fn.uses()) {
+    CallSite CS(U.getUser());
+    if (!CS || !CS.isCallee(&U))
+      continue;
+
+    // Now go through all unused args and replace them with "undef".
+    for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+      unsigned ArgNo = UnusedArgs[I];
+
+      Value *Arg = CS.getArgument(ArgNo);
+      CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+      ++NumArgumentsReplacedWithUndef;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+  Type *RetTy = F->getReturnType();
+  if (RetTy->isVoidTy())
+    return 0;
+  else if (StructType *STy = dyn_cast<StructType>(RetTy))
+    return STy->getNumElements();
+  else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+    return ATy->getNumElements();
+  else
+    return 1;
+}
+
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+  Type *RetTy = F->getReturnType();
+  assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+  if (StructType *STy = dyn_cast<StructType>(RetTy))
+    return STy->getElementType(Idx);
+  else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+    return ATy->getElementType();
+  else
+    return RetTy;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+  // We're live if our use or its Function is already marked as live.
+  if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+    return Live;
+
+  // We're maybe live otherwise, but remember that we must become live if
+  // Use becomes live.
+  MaybeLiveUses.push_back(Use);
+  return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(const Use *U,
+                             UseVector &MaybeLiveUses, unsigned RetValNum) {
+    const User *V = U->getUser();
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+      // The value is returned from a function. It's only live when the
+      // function's return value is live. We use RetValNum here, for the case
+      // that U is really a use of an insertvalue instruction that uses the
+      // original Use.
+      const Function *F = RI->getParent()->getParent();
+      if (RetValNum != -1U) {
+        RetOrArg Use = CreateRet(F, RetValNum);
+        // We might be live, depending on the liveness of Use.
+        return MarkIfNotLive(Use, MaybeLiveUses);
+      } else {
+        DAE::Liveness Result = MaybeLive;
+        for (unsigned i = 0; i < NumRetVals(F); ++i) {
+          RetOrArg Use = CreateRet(F, i);
+          // We might be live, depending on the liveness of Use. If any
+          // sub-value is live, then the entire value is considered live. This
+          // is a conservative choice, and better tracking is possible.
+          DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+          if (Result != Live)
+            Result = SubResult;
+        }
+        return Result;
+      }
+    }
+    if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+      if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+          && IV->hasIndices())
+        // The use we are examining is inserted into an aggregate. Our liveness
+        // depends on all uses of that aggregate, but if it is used as a return
+        // value, only index at which we were inserted counts.
+        RetValNum = *IV->idx_begin();
+
+      // Note that if we are used as the aggregate operand to the insertvalue,
+      // we don't change RetValNum, but do survey all our uses.
+
+      Liveness Result = MaybeLive;
+      for (const Use &UU : IV->uses()) {
+        Result = SurveyUse(&UU, MaybeLiveUses, RetValNum);
+        if (Result == Live)
+          break;
+      }
+      return Result;
+    }
+
+    if (auto CS = ImmutableCallSite(V)) {
+      const Function *F = CS.getCalledFunction();
+      if (F) {
+        // Used in a direct call.
+
+        // The function argument is live if it is used as a bundle operand.
+        if (CS.isBundleOperand(U))
+          return Live;
+
+        // Find the argument number. We know for sure that this use is an
+        // argument, since if it was the function argument this would be an
+        // indirect call and the we know can't be looking at a value of the
+        // label type (for the invoke instruction).
+        unsigned ArgNo = CS.getArgumentNo(U);
+
+        if (ArgNo >= F->getFunctionType()->getNumParams())
+          // The value is passed in through a vararg! Must be live.
+          return Live;
+
+        assert(CS.getArgument(ArgNo)
+               == CS->getOperand(U->getOperandNo())
+               && "Argument is not where we expected it");
+
+        // Value passed to a normal call. It's only live when the corresponding
+        // argument to the called function turns out live.
+        RetOrArg Use = CreateArg(F, ArgNo);
+        return MarkIfNotLive(Use, MaybeLiveUses);
+      }
+    }
+    // Used in any other way? Value must be live.
+    return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+  // Assume it's dead (which will only hold if there are no uses at all..).
+  Liveness Result = MaybeLive;
+  // Check each use.
+  for (const Use &U : V->uses()) {
+    Result = SurveyUse(&U, MaybeLiveUses);
+    if (Result == Live)
+      break;
+  }
+  return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value.  This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+  // Functions with inalloca parameters are expecting args in a particular
+  // register and memory layout.
+  if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+    MarkLive(F);
+    return;
+  }
+
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (F.hasFnAttribute(Attribute::Naked)) {
+    MarkLive(F);
+    return;
+  }
+
+  unsigned RetCount = NumRetVals(&F);
+  // Assume all return values are dead
+  typedef SmallVector<Liveness, 5> RetVals;
+  RetVals RetValLiveness(RetCount, MaybeLive);
+
+  typedef SmallVector<UseVector, 5> RetUses;
+  // These vectors map each return value to the uses that make it MaybeLive, so
+  // we can add those to the Uses map if the return value really turns out to be
+  // MaybeLive. Initialized to a list of RetCount empty lists.
+  RetUses MaybeLiveRetUses(RetCount);
+
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+      if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+          != F.getFunctionType()->getReturnType()) {
+        // We don't support old style multiple return values.
+        MarkLive(F);
+        return;
+      }
+
+  if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+    MarkLive(F);
+    return;
+  }
+
+  DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+  // Keep track of the number of live retvals, so we can skip checks once all
+  // of them turn out to be live.
+  unsigned NumLiveRetVals = 0;
+  // Loop all uses of the function.
+  for (const Use &U : F.uses()) {
+    // If the function is PASSED IN as an argument, its address has been
+    // taken.
+    ImmutableCallSite CS(U.getUser());
+    if (!CS || !CS.isCallee(&U)) {
+      MarkLive(F);
+      return;
+    }
+
+    // If this use is anything other than a call site, the function is alive.
+    const Instruction *TheCall = CS.getInstruction();
+    if (!TheCall) {   // Not a direct call site?
+      MarkLive(F);
+      return;
+    }
+
+    // If we end up here, we are looking at a direct call to our function.
+
+    // Now, check how our return value(s) is/are used in this caller. Don't
+    // bother checking return values if all of them are live already.
+    if (NumLiveRetVals == RetCount)
+      continue;
+
+    // Check all uses of the return value.
+    for (const Use &U : TheCall->uses()) {
+      if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+        // This use uses a part of our return value, survey the uses of
+        // that part and store the results for this index only.
+        unsigned Idx = *Ext->idx_begin();
+        if (RetValLiveness[Idx] != Live) {
+          RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+          if (RetValLiveness[Idx] == Live)
+            NumLiveRetVals++;
+        }
+      } else {
+        // Used by something else than extractvalue. Survey, but assume that the
+        // result applies to all sub-values.
+        UseVector MaybeLiveAggregateUses;
+        if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
+          NumLiveRetVals = RetCount;
+          RetValLiveness.assign(RetCount, Live);
+          break;
+        } else {
+          for (unsigned i = 0; i != RetCount; ++i) {
+            if (RetValLiveness[i] != Live)
+              MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+                                         MaybeLiveAggregateUses.end());
+          }
+        }
+      }
+    }
+  }
+
+  // Now we've inspected all callers, record the liveness of our return values.
+  for (unsigned i = 0; i != RetCount; ++i)
+    MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+  DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+  // Now, check all of our arguments.
+  unsigned i = 0;
+  UseVector MaybeLiveArgUses;
+  for (Function::const_arg_iterator AI = F.arg_begin(),
+       E = F.arg_end(); AI != E; ++AI, ++i) {
+    Liveness Result;
+    if (F.getFunctionType()->isVarArg()) {
+      // Variadic functions will already have a va_arg function expanded inside
+      // them, making them potentially very sensitive to ABI changes resulting
+      // from removing arguments entirely, so don't. For example AArch64 handles
+      // register and stack HFAs very differently, and this is reflected in the
+      // IR which has already been generated.
+      Result = Live;
+    } else {
+      // See what the effect of this use is (recording any uses that cause
+      // MaybeLive in MaybeLiveArgUses). 
+      Result = SurveyUses(&*AI, MaybeLiveArgUses);
+    }
+
+    // Mark the result.
+    MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+    // Clear the vector again for the next iteration.
+    MaybeLiveArgUses.clear();
+  }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+                    const UseVector &MaybeLiveUses) {
+  switch (L) {
+    case Live: MarkLive(RA); break;
+    case MaybeLive:
+    {
+      // Note any uses of this value, so this return value can be
+      // marked live whenever one of the uses becomes live.
+      for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+           UE = MaybeLiveUses.end(); UI != UE; ++UI)
+        Uses.insert(std::make_pair(*UI, RA));
+      break;
+    }
+  }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+  DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+  // Mark the function as live.
+  LiveFunctions.insert(&F);
+  // Mark all arguments as live.
+  for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+    PropagateLiveness(CreateArg(&F, i));
+  // Mark all return values as live.
+  for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+    PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+  if (LiveFunctions.count(RA.F))
+    return; // Function was already marked Live.
+
+  if (!LiveValues.insert(RA).second)
+    return; // We were already marked Live.
+
+  DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+  PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+  // We don't use upper_bound (or equal_range) here, because our recursive call
+  // to ourselves is likely to cause the upper_bound (which is the first value
+  // not belonging to RA) to become erased and the iterator invalidated.
+  UseMap::iterator Begin = Uses.lower_bound(RA);
+  UseMap::iterator E = Uses.end();
+  UseMap::iterator I;
+  for (I = Begin; I != E && I->first == RA; ++I)
+    MarkLive(I->second);
+
+  // Erase RA from the Uses map (from the lower bound to wherever we ended up
+  // after the loop).
+  Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+  // Don't modify fully live functions
+  if (LiveFunctions.count(F))
+    return false;
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has fewer arguments and a different return type.
+  FunctionType *FTy = F->getFunctionType();
+  std::vector<Type*> Params;
+
+  // Keep track of if we have a live 'returned' argument
+  bool HasLiveReturnedArg = false;
+
+  // Set up to build a new list of parameter attributes.
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &PAL = F->getAttributes();
+
+  // Remember which arguments are still alive.
+  SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+  // Construct the new parameter list from non-dead arguments. Also construct
+  // a new set of parameter attributes to correspond. Skip the first parameter
+  // attribute, since that belongs to the return value.
+  unsigned i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++i) {
+    RetOrArg Arg = CreateArg(F, i);
+    if (LiveValues.erase(Arg)) {
+      Params.push_back(I->getType());
+      ArgAlive[i] = true;
+
+      // Get the original parameter attributes (skipping the first one, that is
+      // for the return value.
+      if (PAL.hasAttributes(i + 1)) {
+        AttrBuilder B(PAL, i + 1);
+        if (B.contains(Attribute::Returned))
+          HasLiveReturnedArg = true;
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+      }
+    } else {
+      ++NumArgumentsEliminated;
+      DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+            << ") from " << F->getName() << "\n");
+    }
+  }
+
+  // Find out the new return value.
+  Type *RetTy = FTy->getReturnType();
+  Type *NRetTy = nullptr;
+  unsigned RetCount = NumRetVals(F);
+
+  // -1 means unused, other numbers are the new index
+  SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+  std::vector<Type*> RetTypes;
+
+  // If there is a function with a live 'returned' argument but a dead return
+  // value, then there are two possible actions:
+  // 1) Eliminate the return value and take off the 'returned' attribute on the
+  //    argument.
+  // 2) Retain the 'returned' attribute and treat the return value (but not the
+  //    entire function) as live so that it is not eliminated.
+  // 
+  // It's not clear in the general case which option is more profitable because,
+  // even in the absence of explicit uses of the return value, code generation
+  // is free to use the 'returned' attribute to do things like eliding
+  // save/restores of registers across calls. Whether or not this happens is
+  // target and ABI-specific as well as depending on the amount of register
+  // pressure, so there's no good way for an IR-level pass to figure this out.
+  //
+  // Fortunately, the only places where 'returned' is currently generated by
+  // the FE are places where 'returned' is basically free and almost always a
+  // performance win, so the second option can just be used always for now.
+  //
+  // This should be revisited if 'returned' is ever applied more liberally.
+  if (RetTy->isVoidTy() || HasLiveReturnedArg) {
+    NRetTy = RetTy;
+  } else {
+    // Look at each of the original return values individually.
+    for (unsigned i = 0; i != RetCount; ++i) {
+      RetOrArg Ret = CreateRet(F, i);
+      if (LiveValues.erase(Ret)) {
+        RetTypes.push_back(getRetComponentType(F, i));
+        NewRetIdxs[i] = RetTypes.size() - 1;
+      } else {
+        ++NumRetValsEliminated;
+        DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+              << F->getName() << "\n");
+      }
+    }
+    if (RetTypes.size() > 1) {
+      // More than one return type? Reduce it down to size.
+      if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+        // Make the new struct packed if we used to return a packed struct
+        // already.
+        NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+      } else {
+        assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+        NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
+      }
+    } else if (RetTypes.size() == 1)
+      // One return type? Just a simple value then, but only if we didn't use to
+      // return a struct with that simple value before.
+      NRetTy = RetTypes.front();
+    else if (RetTypes.size() == 0)
+      // No return types? Make it void, but only if we didn't use to return {}.
+      NRetTy = Type::getVoidTy(F->getContext());
+  }
+
+  assert(NRetTy && "No new return type found?");
+
+  // The existing function return attributes.
+  AttributeSet RAttrs = PAL.getRetAttributes();
+
+  // Remove any incompatible attributes, but only if we removed all return
+  // values. Otherwise, ensure that we don't have any conflicting attributes
+  // here. Currently, this should not be possible, but special handling might be
+  // required when new return value attributes are added.
+  if (NRetTy->isVoidTy())
+    RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+                                     AttributeSet::ReturnIndex,
+                                     AttributeFuncs::typeIncompatible(NRetTy));
+  else
+    assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+             overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
+           "Return attributes no longer compatible?");
+
+  if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
+
+  if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                              PAL.getFnAttributes()));
+
+  // Reconstruct the AttributesList based on the vector we constructed.
+  AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+  // Create the new function type based on the recomputed parameters.
+  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+  // No change?
+  if (NFTy == FTy)
+    return false;
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, F->getLinkage());
+  NF->copyAttributesFrom(F);
+  NF->setAttributes(NewPAL);
+  // Insert the new function before the old function, so we won't be processing
+  // it again.
+  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+  NF->takeName(F);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->user_back());
+    Instruction *Call = CS.getInstruction();
+
+    AttributesVec.clear();
+    const AttributeSet &CallPAL = CS.getAttributes();
+
+    // The call return attributes.
+    AttributeSet RAttrs = CallPAL.getRetAttributes();
+
+    // Adjust in case the function was changed to return void.
+    RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+                                     AttributeSet::ReturnIndex,
+                        AttributeFuncs::typeIncompatible(NF->getReturnType()));
+    if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+      AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+
+    // Declare these outside of the loops, so we can reuse them for the second
+    // loop, which loops the varargs.
+    CallSite::arg_iterator I = CS.arg_begin();
+    unsigned i = 0;
+    // Loop over those operands, corresponding to the normal arguments to the
+    // original function, and add those that are still alive.
+    for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+      if (ArgAlive[i]) {
+        Args.push_back(*I);
+        // Get original parameter attributes, but skip return attributes.
+        if (CallPAL.hasAttributes(i + 1)) {
+          AttrBuilder B(CallPAL, i + 1);
+          // If the return type has changed, then get rid of 'returned' on the
+          // call site. The alternative is to make all 'returned' attributes on
+          // call sites keep the return value alive just like 'returned'
+          // attributes on function declaration but it's less clearly a win
+          // and this is not an expected case anyway
+          if (NRetTy != RetTy && B.contains(Attribute::Returned))
+            B.removeAttribute(Attribute::Returned);
+          AttributesVec.
+            push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+        }
+      }
+
+    // Push any varargs arguments on the list. Don't forget their attributes.
+    for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+      Args.push_back(*I);
+      if (CallPAL.hasAttributes(i + 1)) {
+        AttrBuilder B(CallPAL, i + 1);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+      }
+    }
+
+    if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+      AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+                                                CallPAL.getFnAttributes()));
+
+    // Reconstruct the AttributesList based on the vector we constructed.
+    AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args, "", Call->getParent());
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+    } else {
+      New = CallInst::Create(NF, Args, "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(NewCallPAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty()) {
+      if (New->getType() == Call->getType()) {
+        // Return type not changed? Just replace users then.
+        Call->replaceAllUsesWith(New);
+        New->takeName(Call);
+      } else if (New->getType()->isVoidTy()) {
+        // Our return value has uses, but they will get removed later on.
+        // Replace by null for now.
+        if (!Call->getType()->isX86_MMXTy())
+          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+      } else {
+        assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
+               "Return type changed, but not into a void. The old return type"
+               " must have been a struct or an array!");
+        Instruction *InsertPt = Call;
+        if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+          BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+          InsertPt = &*NewEdge->getFirstInsertionPt();
+        }
+
+        // We used to return a struct or array. Instead of doing smart stuff
+        // with all the uses, we will just rebuild it using extract/insertvalue
+        // chaining and let instcombine clean that up.
+        //
+        // Start out building up our return value from undef
+        Value *RetVal = UndefValue::get(RetTy);
+        for (unsigned i = 0; i != RetCount; ++i)
+          if (NewRetIdxs[i] != -1) {
+            Value *V;
+            if (RetTypes.size() > 1)
+              // We are still returning a struct, so extract the value from our
+              // return value
+              V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+                                           InsertPt);
+            else
+              // We are now returning a single element, so just insert that
+              V = New;
+            // Insert the value at the old position
+            RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+          }
+        // Now, replace all uses of the old call instruction with the return
+        // struct we built
+        Call->replaceAllUsesWith(RetVal);
+        New->takeName(Call);
+      }
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
+  i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++i)
+    if (ArgAlive[i]) {
+      // If this is a live argument, move the name and users over to the new
+      // version.
+      I->replaceAllUsesWith(&*I2);
+      I2->takeName(&*I);
+      ++I2;
+    } else {
+      // If this argument is dead, replace any uses of it with null constants
+      // (these are guaranteed to become unused later on).
+      if (!I->getType()->isX86_MMXTy())
+        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+    }
+
+  // If we change the return value of the function we must rewrite any return
+  // instructions.  Check this now.
+  if (F->getReturnType() != NF->getReturnType())
+    for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+        Value *RetVal;
+
+        if (NFTy->getReturnType()->isVoidTy()) {
+          RetVal = nullptr;
+        } else {
+          assert(RetTy->isStructTy() || RetTy->isArrayTy());
+          // The original return value was a struct or array, insert
+          // extractvalue/insertvalue chains to extract only the values we need
+          // to return and insert them into our new result.
+          // This does generate messy code, but we'll let it to instcombine to
+          // clean that up.
+          Value *OldRet = RI->getOperand(0);
+          // Start out building up our return value from undef
+          RetVal = UndefValue::get(NRetTy);
+          for (unsigned i = 0; i != RetCount; ++i)
+            if (NewRetIdxs[i] != -1) {
+              ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+                                                              "oldret", RI);
+              if (RetTypes.size() > 1) {
+                // We're still returning a struct, so reinsert the value into
+                // our new return value at the new index
+
+                RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+                                                 "newret", RI);
+              } else {
+                // We are now only returning a simple value, so just return the
+                // extracted value.
+                RetVal = EV;
+              }
+            }
+        }
+        // Replace the return instruction with one returning the new return
+        // value (possibly 0 if we became void).
+        ReturnInst::Create(F->getContext(), RetVal, RI);
+        BB->getInstList().erase(RI);
+      }
+
+  // Patch the pointer to LLVM function in debug info descriptor.
+  NF->setSubprogram(F->getSubprogram());
+
+  // Now that the old function is dead, delete it.
+  F->eraseFromParent();
+
+  return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // First pass: Do a simple check to see if any functions can have their "..."
+  // removed.  We can do this if they never call va_start.  This loop cannot be
+  // fused with the next loop, because deleting a function invalidates
+  // information computed while surveying other functions.
+  DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function &F = *I++;
+    if (F.getFunctionType()->isVarArg())
+      Changed |= DeleteDeadVarargs(F);
+  }
+
+  // Second phase:loop through the module, determining which arguments are live.
+  // We assume all arguments are dead unless proven otherwise (allowing us to
+  // determine that dead arguments passed into recursive functions are dead).
+  //
+  DEBUG(dbgs() << "DAE - Determining liveness\n");
+  for (auto &F : M)
+    SurveyFunction(F);
+
+  // Now, remove all dead arguments and return values from each function in
+  // turn.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    // Increment now, because the function will probably get removed (ie.
+    // replaced by a new one).
+    Function *F = &*I++;
+    Changed |= RemoveDeadStuffFromFunction(F);
+  }
+
+  // Finally, look for any unused parameters in functions with non-local
+  // linkage and replace the passed in parameters with undef.
+  for (auto &F : M)
+    Changed |= RemoveDeadArgumentsFromCallers(F);
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
new file mode 100644
index 0000000..af313a6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -0,0 +1,84 @@
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate available external global
+// definitions from the program, turning them into declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "elim-avail-extern"
+
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+struct EliminateAvailableExternally : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  EliminateAvailableExternally() : ModulePass(ID) {
+    initializeEliminateAvailableExternallyPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  // run - Do the EliminateAvailableExternally pass on the specified module,
+  // optionally updating the specified callgraph to reflect the changes.
+  //
+  bool runOnModule(Module &M) override;
+};
+}
+
+char EliminateAvailableExternally::ID = 0;
+INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern",
+                "Eliminate Available Externally Globals", false, false)
+
+ModulePass *llvm::createEliminateAvailableExternallyPass() {
+  return new EliminateAvailableExternally();
+}
+
+bool EliminateAvailableExternally::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Drop initializers of available externally global variables.
+  for (GlobalVariable &GV : M.globals()) {
+    if (!GV.hasAvailableExternallyLinkage())
+      continue;
+    if (GV.hasInitializer()) {
+      Constant *Init = GV.getInitializer();
+      GV.setInitializer(nullptr);
+      if (isSafeToDestroyConstant(Init))
+        Init->destroyConstant();
+    }
+    GV.removeDeadConstantUsers();
+    GV.setLinkage(GlobalValue::ExternalLinkage);
+    NumVariables++;
+    Changed = true;
+  }
+
+  // Drop the bodies of available externally functions.
+  for (Function &F : M) {
+    if (!F.hasAvailableExternallyLinkage())
+      continue;
+    if (!F.isDeclaration())
+      // This will set the linkage to external
+      F.deleteBody();
+    F.removeDeadConstantUsers();
+    NumFunctions++;
+    Changed = true;
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 0000000..1a3b925
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,160 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+using namespace llvm;
+
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+  bool Local = GV.hasLocalLinkage();
+  if (Local || Delete) {
+    GV.setLinkage(GlobalValue::ExternalLinkage);
+    if (Local)
+      GV.setVisibility(GlobalValue::HiddenVisibility);
+    return;
+  }
+
+  if (!GV.hasLinkOnceLinkage()) {
+    assert(!GV.isDiscardableIfUnused());
+    return;
+  }
+
+  // Map linkonce* to weak* so that llvm doesn't drop this GV.
+  switch(GV.getLinkage()) {
+  default:
+    llvm_unreachable("Unexpected linkage");
+  case GlobalValue::LinkOnceAnyLinkage:
+    GV.setLinkage(GlobalValue::WeakAnyLinkage);
+    return;
+  case GlobalValue::LinkOnceODRLinkage:
+    GV.setLinkage(GlobalValue::WeakODRLinkage);
+    return;
+  }
+}
+
+namespace {
+  /// @brief A pass to extract specific functions and their dependencies.
+  class GVExtractorPass : public ModulePass {
+    SetVector<GlobalValue *> Named;
+    bool deleteStuff;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+
+    /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+    /// specified function. Otherwise, it deletes as much of the module as
+    /// possible, except for the function specified.
+    ///
+    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+      : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+    bool runOnModule(Module &M) override {
+      // Visit the global inline asm.
+      if (!deleteStuff)
+        M.setModuleInlineAsm("");
+
+      // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+      // implementation could figure out which GlobalValues are actually
+      // referenced by the Named set, and which GlobalValues in the rest of
+      // the module are referenced by the NamedSet, and get away with leaving
+      // more internal and private things internal and private. But for now,
+      // be conservative and simple.
+
+      // Visit the GlobalVariables.
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+           I != E; ++I) {
+        bool Delete =
+            deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+        if (!Delete) {
+          if (I->hasAvailableExternallyLinkage())
+            continue;
+          if (I->getName() == "llvm.global_ctors")
+            continue;
+        }
+
+        makeVisible(*I, Delete);
+
+        if (Delete) {
+          // Make this a declaration and drop it's comdat.
+          I->setInitializer(nullptr);
+          I->setComdat(nullptr);
+        }
+      }
+
+      // Visit the Functions.
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        bool Delete =
+            deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+        if (!Delete) {
+          if (I->hasAvailableExternallyLinkage())
+            continue;
+        }
+
+        makeVisible(*I, Delete);
+
+        if (Delete) {
+          // Make this a declaration and drop it's comdat.
+          I->deleteBody();
+          I->setComdat(nullptr);
+        }
+      }
+
+      // Visit the Aliases.
+      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+           I != E;) {
+        Module::alias_iterator CurI = I;
+        ++I;
+
+        bool Delete = deleteStuff == (bool)Named.count(&*CurI);
+        makeVisible(*CurI, Delete);
+
+        if (Delete) {
+          Type *Ty =  CurI->getType()->getElementType();
+
+          CurI->removeFromParent();
+          llvm::Value *Declaration;
+          if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+            Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                           CurI->getName(), &M);
+
+          } else {
+            Declaration =
+              new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+                                 nullptr, CurI->getName());
+
+          }
+          CurI->replaceAllUsesWith(Declaration);
+          delete &*CurI;
+        }
+      }
+
+      return true;
+    }
+  };
+
+  char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
+                                         bool deleteFn) {
+  return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 0000000..816291d
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+    ForceAttributes("force-attribute", cl::Hidden,
+                    cl::desc("Add an attribute to a function. This should be a "
+                             "pair of 'function-name:attribute-name', for "
+                             "example -force-add-attribute=foo:noinline. This "
+                             "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+  return StringSwitch<Attribute::AttrKind>(Kind)
+      .Case("alwaysinline", Attribute::AlwaysInline)
+      .Case("builtin", Attribute::Builtin)
+      .Case("cold", Attribute::Cold)
+      .Case("convergent", Attribute::Convergent)
+      .Case("inlinehint", Attribute::InlineHint)
+      .Case("jumptable", Attribute::JumpTable)
+      .Case("minsize", Attribute::MinSize)
+      .Case("naked", Attribute::Naked)
+      .Case("nobuiltin", Attribute::NoBuiltin)
+      .Case("noduplicate", Attribute::NoDuplicate)
+      .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+      .Case("noinline", Attribute::NoInline)
+      .Case("nonlazybind", Attribute::NonLazyBind)
+      .Case("noredzone", Attribute::NoRedZone)
+      .Case("noreturn", Attribute::NoReturn)
+      .Case("norecurse", Attribute::NoRecurse)
+      .Case("nounwind", Attribute::NoUnwind)
+      .Case("optnone", Attribute::OptimizeNone)
+      .Case("optsize", Attribute::OptimizeForSize)
+      .Case("readnone", Attribute::ReadNone)
+      .Case("readonly", Attribute::ReadOnly)
+      .Case("argmemonly", Attribute::ArgMemOnly)
+      .Case("returns_twice", Attribute::ReturnsTwice)
+      .Case("safestack", Attribute::SafeStack)
+      .Case("sanitize_address", Attribute::SanitizeAddress)
+      .Case("sanitize_memory", Attribute::SanitizeMemory)
+      .Case("sanitize_thread", Attribute::SanitizeThread)
+      .Case("ssp", Attribute::StackProtect)
+      .Case("sspreq", Attribute::StackProtectReq)
+      .Case("sspstrong", Attribute::StackProtectStrong)
+      .Case("uwtable", Attribute::UWTable)
+      .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+  for (auto &S : ForceAttributes) {
+    auto KV = StringRef(S).split(':');
+    if (KV.first != F.getName())
+      continue;
+
+    auto Kind = parseAttrKind(KV.second);
+    if (Kind == Attribute::None) {
+      DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+                   << " unknown or not handled!\n");
+      continue;
+    }
+    if (F.hasFnAttribute(Kind))
+      continue;
+    F.addFnAttr(Kind);
+  }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+  if (ForceAttributes.empty())
+    return PreservedAnalyses::all();
+
+  for (Function &F : M.functions())
+    addForcedAttributes(F);
+
+  // Just conservatively invalidate analyses, this isn't likely to be important.
+  return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+    initializeForceFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    if (ForceAttributes.empty())
+      return false;
+
+    for (Function &F : M.functions())
+      addForcedAttributes(F);
+
+    // Conservatively assume we changed something.
+    return true;
+  }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+                "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+  return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 0000000..6dcfb3f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,1058 @@
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly.  It does the
+// same with function arguments independently, marking them readonly/
+// readnone/nocapture.  Finally, well-known library call declarations
+// are marked with all attributes that are consistent with the
+// function's standard definition. This pass is implemented as a
+// bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "functionattrs"
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
+STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
+
+namespace {
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
+
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+  static char ID; // Pass identification, replacement for typeid
+  FunctionAttrs() : CallGraphSCCPass(ID) {
+    initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnSCC(CallGraphSCC &SCC) override;
+  bool doInitialization(CallGraph &CG) override {
+    Revisit.clear();
+    return false;
+  }
+  bool doFinalization(CallGraph &CG) override;
+  
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
+
+private:
+  TargetLibraryInfo *TLI;
+  SmallVector<WeakVH,16> Revisit;
+};
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+                      "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+                    "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+  MAK_ReadNone = 0,
+  MAK_ReadOnly = 1,
+  MAK_MayWrite = 2
+};
+}
+
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+                                                  const SCCNodeSet &SCCNodes) {
+  FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+  if (MRB == FMRB_DoesNotAccessMemory)
+    // Already perfect!
+    return MAK_ReadNone;
+
+  // Definitions with weak linkage may be overridden at linktime with
+  // something that writes memory, so treat them like declarations.
+  if (F.isDeclaration() || F.mayBeOverridden()) {
+    if (AliasAnalysis::onlyReadsMemory(MRB))
+      return MAK_ReadOnly;
+
+    // Conservatively assume it writes to memory.
+    return MAK_MayWrite;
+  }
+
+  // Scan the function body for instructions that may read or write memory.
+  bool ReadsMemory = false;
+  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+    Instruction *I = &*II;
+
+    // Some instructions can be ignored even if they read or write memory.
+    // Detect these now, skipping to the next instruction if one is found.
+    CallSite CS(cast<Value>(I));
+    if (CS) {
+      // Ignore calls to functions in the same SCC.
+      if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+        continue;
+      FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+
+      // If the call doesn't access memory, we're done.
+      if (!(MRB & MRI_ModRef))
+        continue;
+
+      if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+        // The call could access any memory. If that includes writes, give up.
+        if (MRB & MRI_Mod)
+          return MAK_MayWrite;
+        // If it reads, note it.
+        if (MRB & MRI_Ref)
+          ReadsMemory = true;
+        continue;
+      }
+
+      // Check whether all pointer arguments point to local memory, and
+      // ignore calls that only access local memory.
+      for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+           CI != CE; ++CI) {
+        Value *Arg = *CI;
+        if (!Arg->getType()->isPtrOrPtrVectorTy())
+          continue;
+
+        AAMDNodes AAInfo;
+        I->getAAMetadata(AAInfo);
+        MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
+
+        // Skip accesses to local or constant memory as they don't impact the
+        // externally visible mod/ref behavior.
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+
+        if (MRB & MRI_Mod)
+          // Writes non-local memory.  Give up.
+          return MAK_MayWrite;
+        if (MRB & MRI_Ref)
+          // Ok, it reads non-local memory.
+          ReadsMemory = true;
+      }
+      continue;
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+      if (!LI->isVolatile()) {
+        MemoryLocation Loc = MemoryLocation::get(LI);
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+      if (!SI->isVolatile()) {
+        MemoryLocation Loc = MemoryLocation::get(SI);
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+    } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+      // Ignore vaargs on local memory.
+      MemoryLocation Loc = MemoryLocation::get(VI);
+      if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+        continue;
+    }
+
+    // Any remaining instructions need to be taken seriously!  Check if they
+    // read or write memory.
+    if (I->mayWriteToMemory())
+      // Writes memory.  Just give up.
+      return MAK_MayWrite;
+
+    // If this instruction may read memory, remember that.
+    ReadsMemory |= I->mayReadFromMemory();
+  }
+
+  return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
+
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+  // Check if any of the functions in the SCC read or write memory.  If they
+  // write memory then they can't be marked readnone or readonly.
+  bool ReadsMemory = false;
+  for (Function *F : SCCNodes) {
+    // Call the callable parameter to look up AA results for this function.
+    AAResults &AAR = AARGetter(*F);
+
+    switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+    case MAK_MayWrite:
+      return false;
+    case MAK_ReadOnly:
+      ReadsMemory = true;
+      break;
+    case MAK_ReadNone:
+      // Nothing to do!
+      break;
+    }
+  }
+
+  // Success!  Functions in this SCC do not access memory, or only read memory.
+  // Give them the appropriate attribute.
+  bool MadeChange = false;
+  for (Function *F : SCCNodes) {
+    if (F->doesNotAccessMemory())
+      // Already perfect!
+      continue;
+
+    if (F->onlyReadsMemory() && ReadsMemory)
+      // No change.
+      continue;
+
+    MadeChange = true;
+
+    // Clear out any existing attributes.
+    AttrBuilder B;
+    B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+    F->removeAttributes(
+        AttributeSet::FunctionIndex,
+        AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
+
+    // Add in the new attribute.
+    F->addAttribute(AttributeSet::FunctionIndex,
+                    ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+
+    if (ReadsMemory)
+      ++NumReadOnly;
+    else
+      ++NumReadNone;
+  }
+
+  return MadeChange;
+}
+
+namespace {
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+  Argument *Definition;
+  SmallVector<ArgumentGraphNode *, 4> Uses;
+};
+
+class ArgumentGraph {
+  // We store pointers to ArgumentGraphNode objects, so it's important that
+  // that they not move around upon insert.
+  typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+
+  ArgumentMapTy ArgumentMap;
+
+  // There is no root node for the argument graph, in fact:
+  //   void f(int *x, int *y) { if (...) f(x, y); }
+  // is an example where the graph is disconnected. The SCCIterator requires a
+  // single entry point, so we maintain a fake ("synthetic") root node that
+  // uses every node. Because the graph is directed and nothing points into
+  // the root, it will not participate in any SCCs (except for its own).
+  ArgumentGraphNode SyntheticRoot;
+
+public:
+  ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+
+  typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+
+  iterator begin() { return SyntheticRoot.Uses.begin(); }
+  iterator end() { return SyntheticRoot.Uses.end(); }
+  ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+  ArgumentGraphNode *operator[](Argument *A) {
+    ArgumentGraphNode &Node = ArgumentMap[A];
+    Node.Definition = A;
+    SyntheticRoot.Uses.push_back(&Node);
+    return &Node;
+  }
+};
+
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+  ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
+      : Captured(false), SCCNodes(SCCNodes) {}
+
+  void tooManyUses() override { Captured = true; }
+
+  bool captured(const Use *U) override {
+    CallSite CS(U->getUser());
+    if (!CS.getInstruction()) {
+      Captured = true;
+      return true;
+    }
+
+    Function *F = CS.getCalledFunction();
+    if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+        !SCCNodes.count(F)) {
+      Captured = true;
+      return true;
+    }
+
+    // Note: the callee and the two successor blocks *follow* the argument
+    // operands.  This means there is no need to adjust UseIndex to account for
+    // these.
+
+    unsigned UseIndex =
+        std::distance(const_cast<const Use *>(CS.arg_begin()), U);
+
+    assert(UseIndex < CS.data_operands_size() &&
+           "Indirect function calls should have been filtered above!");
+
+    if (UseIndex >= CS.getNumArgOperands()) {
+      // Data operand, but not a argument operand -- must be a bundle operand
+      assert(CS.hasOperandBundles() && "Must be!");
+
+      // CaptureTracking told us that we're being captured by an operand bundle
+      // use.  In this case it does not matter if the callee is within our SCC
+      // or not -- we've been captured in some unknown way, and we have to be
+      // conservative.
+      Captured = true;
+      return true;
+    }
+
+    if (UseIndex >= F->arg_size()) {
+      assert(F->isVarArg() && "More params than args in non-varargs call");
+      Captured = true;
+      return true;
+    }
+
+    Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+    return false;
+  }
+
+  bool Captured; // True only if certainly captured (used outside our SCC).
+  SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+  const SCCNodeSet &SCCNodes;
+};
+}
+
+namespace llvm {
+template <> struct GraphTraits<ArgumentGraphNode *> {
+  typedef ArgumentGraphNode NodeType;
+  typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(NodeType *A) { return A; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->Uses.begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->Uses.end();
+  }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+  static NodeType *getEntryNode(ArgumentGraph *AG) {
+    return AG->getEntryNode();
+  }
+  static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+    return AG->begin();
+  }
+  static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
+}
+
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+static Attribute::AttrKind
+determinePointerReadAttrs(Argument *A,
+                          const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+  SmallVector<Use *, 32> Worklist;
+  SmallSet<Use *, 32> Visited;
+
+  // inalloca arguments are always clobbered by the call.
+  if (A->hasInAllocaAttr())
+    return Attribute::None;
+
+  bool IsRead = false;
+  // We don't need to track IsWritten. If A is written to, return immediately.
+
+  for (Use &U : A->uses()) {
+    Visited.insert(&U);
+    Worklist.push_back(&U);
+  }
+
+  while (!Worklist.empty()) {
+    Use *U = Worklist.pop_back_val();
+    Instruction *I = cast<Instruction>(U->getUser());
+
+    switch (I->getOpcode()) {
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::PHI:
+    case Instruction::Select:
+    case Instruction::AddrSpaceCast:
+      // The original value is not read/written via this if the new value isn't.
+      for (Use &UU : I->uses())
+        if (Visited.insert(&UU).second)
+          Worklist.push_back(&UU);
+      break;
+
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      bool Captures = true;
+
+      if (I->getType()->isVoidTy())
+        Captures = false;
+
+      auto AddUsersToWorklistIfCapturing = [&] {
+        if (Captures)
+          for (Use &UU : I->uses())
+            if (Visited.insert(&UU).second)
+              Worklist.push_back(&UU);
+      };
+
+      CallSite CS(I);
+      if (CS.doesNotAccessMemory()) {
+        AddUsersToWorklistIfCapturing();
+        continue;
+      }
+
+      Function *F = CS.getCalledFunction();
+      if (!F) {
+        if (CS.onlyReadsMemory()) {
+          IsRead = true;
+          AddUsersToWorklistIfCapturing();
+          continue;
+        }
+        return Attribute::None;
+      }
+
+      // Note: the callee and the two successor blocks *follow* the argument
+      // operands.  This means there is no need to adjust UseIndex to account
+      // for these.
+
+      unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+      // U cannot be the callee operand use: since we're exploring the
+      // transitive uses of an Argument, having such a use be a callee would
+      // imply the CallSite is an indirect call or invoke; and we'd take the
+      // early exit above.
+      assert(UseIndex < CS.data_operands_size() &&
+             "Data operand use expected!");
+
+      bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+      if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+        assert(F->isVarArg() && "More params than args in non-varargs call");
+        return Attribute::None;
+      }
+
+      Captures &= !CS.doesNotCapture(UseIndex);
+
+      // Since the optimizer (by design) cannot see the data flow corresponding
+      // to a operand bundle use, these cannot participate in the optimistic SCC
+      // analysis.  Instead, we model the operand bundle uses as arguments in
+      // call to a function external to the SCC.
+      if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+          IsOperandBundleUse) {
+
+        // The accessors used on CallSite here do the right thing for calls and
+        // invokes with operand bundles.
+
+        if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+          return Attribute::None;
+        if (!CS.doesNotAccessMemory(UseIndex))
+          IsRead = true;
+      }
+
+      AddUsersToWorklistIfCapturing();
+      break;
+    }
+
+    case Instruction::Load:
+      IsRead = true;
+      break;
+
+    case Instruction::ICmp:
+    case Instruction::Ret:
+      break;
+
+    default:
+      return Attribute::None;
+    }
+  }
+
+  return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
+}
+
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
+  bool Changed = false;
+
+  ArgumentGraph AG;
+
+  AttrBuilder B;
+  B.addAttribute(Attribute::NoCapture);
+
+  // Check each function in turn, determining which pointer arguments are not
+  // captured.
+  for (Function *F : SCCNodes) {
+    // Definitions with weak linkage may be overridden at linktime with
+    // something that captures pointers, so treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      continue;
+
+    // Functions that are readonly (or readnone) and nounwind and don't return
+    // a value can't capture arguments. Don't analyze them.
+    if (F->onlyReadsMemory() && F->doesNotThrow() &&
+        F->getReturnType()->isVoidTy()) {
+      for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+           ++A) {
+        if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+          A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+          ++NumNoCapture;
+          Changed = true;
+        }
+      }
+      continue;
+    }
+
+    for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+         ++A) {
+      if (!A->getType()->isPointerTy())
+        continue;
+      bool HasNonLocalUses = false;
+      if (!A->hasNoCaptureAttr()) {
+        ArgumentUsesTracker Tracker(SCCNodes);
+        PointerMayBeCaptured(&*A, &Tracker);
+        if (!Tracker.Captured) {
+          if (Tracker.Uses.empty()) {
+            // If it's trivially not captured, mark it nocapture now.
+            A->addAttr(
+                AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+            ++NumNoCapture;
+            Changed = true;
+          } else {
+            // If it's not trivially captured and not trivially not captured,
+            // then it must be calling into another function in our SCC. Save
+            // its particulars for Argument-SCC analysis later.
+            ArgumentGraphNode *Node = AG[&*A];
+            for (SmallVectorImpl<Argument *>::iterator
+                     UI = Tracker.Uses.begin(),
+                     UE = Tracker.Uses.end();
+                 UI != UE; ++UI) {
+              Node->Uses.push_back(AG[*UI]);
+              if (*UI != A)
+                HasNonLocalUses = true;
+            }
+          }
+        }
+        // Otherwise, it's captured. Don't bother doing SCC analysis on it.
+      }
+      if (!HasNonLocalUses && !A->onlyReadsMemory()) {
+        // Can we determine that it's readonly/readnone without doing an SCC?
+        // Note that we don't allow any calls at all here, or else our result
+        // will be dependent on the iteration order through the functions in the
+        // SCC.
+        SmallPtrSet<Argument *, 8> Self;
+        Self.insert(&*A);
+        Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
+        if (R != Attribute::None) {
+          AttrBuilder B;
+          B.addAttribute(R);
+          A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+          Changed = true;
+          R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+        }
+      }
+    }
+  }
+
+  // The graph we've collected is partial because we stopped scanning for
+  // argument uses once we solved the argument trivially. These partial nodes
+  // show up as ArgumentGraphNode objects with an empty Uses list, and for
+  // these nodes the final decision about whether they capture has already been
+  // made.  If the definition doesn't have a 'nocapture' attribute by now, it
+  // captures.
+
+  for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+    const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
+    if (ArgumentSCC.size() == 1) {
+      if (!ArgumentSCC[0]->Definition)
+        continue; // synthetic root node
+
+      // eg. "void f(int* x) { if (...) f(x); }"
+      if (ArgumentSCC[0]->Uses.size() == 1 &&
+          ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
+        Argument *A = ArgumentSCC[0]->Definition;
+        A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+        ++NumNoCapture;
+        Changed = true;
+      }
+      continue;
+    }
+
+    bool SCCCaptured = false;
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+         I != E && !SCCCaptured; ++I) {
+      ArgumentGraphNode *Node = *I;
+      if (Node->Uses.empty()) {
+        if (!Node->Definition->hasNoCaptureAttr())
+          SCCCaptured = true;
+      }
+    }
+    if (SCCCaptured)
+      continue;
+
+    SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
+    // Fill ArgumentSCCNodes with the elements of the ArgumentSCC.  Used for
+    // quickly looking up whether a given Argument is in this ArgumentSCC.
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
+      ArgumentSCCNodes.insert((*I)->Definition);
+    }
+
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+         I != E && !SCCCaptured; ++I) {
+      ArgumentGraphNode *N = *I;
+      for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+                                                          UE = N->Uses.end();
+           UI != UE; ++UI) {
+        Argument *A = (*UI)->Definition;
+        if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
+          continue;
+        SCCCaptured = true;
+        break;
+      }
+    }
+    if (SCCCaptured)
+      continue;
+
+    for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+      Argument *A = ArgumentSCC[i]->Definition;
+      A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+      ++NumNoCapture;
+      Changed = true;
+    }
+
+    // We also want to compute readonly/readnone. With a small number of false
+    // negatives, we can assume that any pointer which is captured isn't going
+    // to be provably readonly or readnone, since by definition we can't
+    // analyze all uses of a captured pointer.
+    //
+    // The false negatives happen when the pointer is captured by a function
+    // that promises readonly/readnone behaviour on the pointer, then the
+    // pointer's lifetime ends before anything that writes to arbitrary memory.
+    // Also, a readonly/readnone pointer may be returned, but returning a
+    // pointer is capturing it.
+
+    Attribute::AttrKind ReadAttr = Attribute::ReadNone;
+    for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+      Argument *A = ArgumentSCC[i]->Definition;
+      Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes);
+      if (K == Attribute::ReadNone)
+        continue;
+      if (K == Attribute::ReadOnly) {
+        ReadAttr = Attribute::ReadOnly;
+        continue;
+      }
+      ReadAttr = K;
+      break;
+    }
+
+    if (ReadAttr != Attribute::None) {
+      AttrBuilder B, R;
+      B.addAttribute(ReadAttr);
+      R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+      for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+        Argument *A = ArgumentSCC[i]->Definition;
+        // Clear out existing readonly/readnone attributes
+        A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
+        A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+        ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
+  SmallSetVector<Value *, 8> FlowsToReturn;
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+      FlowsToReturn.insert(Ret->getReturnValue());
+
+  for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+    Value *RetVal = FlowsToReturn[i];
+
+    if (Constant *C = dyn_cast<Constant>(RetVal)) {
+      if (!C->isNullValue() && !isa<UndefValue>(C))
+        return false;
+
+      continue;
+    }
+
+    if (isa<Argument>(RetVal))
+      return false;
+
+    if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+      switch (RVI->getOpcode()) {
+      // Extend the analysis by looking upwards.
+      case Instruction::BitCast:
+      case Instruction::GetElementPtr:
+      case Instruction::AddrSpaceCast:
+        FlowsToReturn.insert(RVI->getOperand(0));
+        continue;
+      case Instruction::Select: {
+        SelectInst *SI = cast<SelectInst>(RVI);
+        FlowsToReturn.insert(SI->getTrueValue());
+        FlowsToReturn.insert(SI->getFalseValue());
+        continue;
+      }
+      case Instruction::PHI: {
+        PHINode *PN = cast<PHINode>(RVI);
+        for (Value *IncValue : PN->incoming_values())
+          FlowsToReturn.insert(IncValue);
+        continue;
+      }
+
+      // Check whether the pointer came from an allocation.
+      case Instruction::Alloca:
+        break;
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        CallSite CS(RVI);
+        if (CS.paramHasAttr(0, Attribute::NoAlias))
+          break;
+        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+          break;
+      } // fall-through
+      default:
+        return false; // Did not come from an allocation.
+      }
+
+    if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+      return false;
+  }
+
+  return true;
+}
+
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
+  // Check each function in turn, determining which functions return noalias
+  // pointers.
+  for (Function *F : SCCNodes) {
+    // Already noalias.
+    if (F->doesNotAlias(0))
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime, so
+    // treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      return false;
+
+    // We annotate noalias return values, which are only applicable to
+    // pointer types.
+    if (!F->getReturnType()->isPointerTy())
+      continue;
+
+    if (!isFunctionMallocLike(F, SCCNodes))
+      return false;
+  }
+
+  bool MadeChange = false;
+  for (Function *F : SCCNodes) {
+    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+      continue;
+
+    F->setDoesNotAlias(0);
+    ++NumNoAlias;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+                            const TargetLibraryInfo &TLI, bool &Speculative) {
+  assert(F->getReturnType()->isPointerTy() &&
+         "nonnull only meaningful on pointer types");
+  Speculative = false;
+
+  SmallSetVector<Value *, 8> FlowsToReturn;
+  for (BasicBlock &BB : *F)
+    if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+      FlowsToReturn.insert(Ret->getReturnValue());
+
+  for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+    Value *RetVal = FlowsToReturn[i];
+
+    // If this value is locally known to be non-null, we're good
+    if (isKnownNonNull(RetVal, &TLI))
+      continue;
+
+    // Otherwise, we need to look upwards since we can't make any local
+    // conclusions.
+    Instruction *RVI = dyn_cast<Instruction>(RetVal);
+    if (!RVI)
+      return false;
+    switch (RVI->getOpcode()) {
+    // Extend the analysis by looking upwards.
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::AddrSpaceCast:
+      FlowsToReturn.insert(RVI->getOperand(0));
+      continue;
+    case Instruction::Select: {
+      SelectInst *SI = cast<SelectInst>(RVI);
+      FlowsToReturn.insert(SI->getTrueValue());
+      FlowsToReturn.insert(SI->getFalseValue());
+      continue;
+    }
+    case Instruction::PHI: {
+      PHINode *PN = cast<PHINode>(RVI);
+      for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        FlowsToReturn.insert(PN->getIncomingValue(i));
+      continue;
+    }
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      CallSite CS(RVI);
+      Function *Callee = CS.getCalledFunction();
+      // A call to a node within the SCC is assumed to return null until
+      // proven otherwise
+      if (Callee && SCCNodes.count(Callee)) {
+        Speculative = true;
+        continue;
+      }
+      return false;
+    }
+    default:
+      return false; // Unknown source, may be null
+    };
+    llvm_unreachable("should have either continued or returned");
+  }
+
+  return true;
+}
+
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+                            const TargetLibraryInfo &TLI) {
+  // Speculative that all functions in the SCC return only nonnull
+  // pointers.  We may refute this as we analyze functions.
+  bool SCCReturnsNonNull = true;
+
+  bool MadeChange = false;
+
+  // Check each function in turn, determining which functions return nonnull
+  // pointers.
+  for (Function *F : SCCNodes) {
+    // Already nonnull.
+    if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                        Attribute::NonNull))
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime, so
+    // treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      return false;
+
+    // We annotate nonnull return values, which are only applicable to
+    // pointer types.
+    if (!F->getReturnType()->isPointerTy())
+      continue;
+
+    bool Speculative = false;
+    if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+      if (!Speculative) {
+        // Mark the function eagerly since we may discover a function
+        // which prevents us from speculating about the entire SCC
+        DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+        F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+        ++NumNonNullReturn;
+        MadeChange = true;
+      }
+      continue;
+    }
+    // At least one function returns something which could be null, can't
+    // speculate any more.
+    SCCReturnsNonNull = false;
+  }
+
+  if (SCCReturnsNonNull) {
+    for (Function *F : SCCNodes) {
+      if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                          Attribute::NonNull) ||
+          !F->getReturnType()->isPointerTy())
+        continue;
+
+      DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+      F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+      ++NumNonNullReturn;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+static bool setDoesNotRecurse(Function &F) {
+  if (F.doesNotRecurse())
+    return false;
+  F.setDoesNotRecurse();
+  ++NumNoRecurse;
+  return true;
+}
+
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+                              SmallVectorImpl<WeakVH> &Revisit) {
+  // Try and identify functions that do not recurse.
+
+  // If the SCC contains multiple nodes we know for sure there is recursion.
+  if (!SCC.isSingular())
+    return false;
+
+  const CallGraphNode *CGN = *SCC.begin();
+  Function *F = CGN->getFunction();
+  if (!F || F->isDeclaration() || F->doesNotRecurse())
+    return false;
+
+  // If all of the calls in F are identifiable and are to norecurse functions, F
+  // is norecurse. This check also detects self-recursion as F is not currently
+  // marked norecurse, so any called from F to F will not be marked norecurse.
+  if (std::all_of(CGN->begin(), CGN->end(),
+                  [](const CallGraphNode::CallRecord &CR) {
+                    Function *F = CR.second->getFunction();
+                    return F && F->doesNotRecurse();
+                  }))
+    // Function calls a potentially recursive function.
+    return setDoesNotRecurse(*F);
+
+  // We know that F is not obviously recursive, but we haven't been able to
+  // prove that it doesn't actually recurse. Add it to the Revisit list to try
+  // again top-down later.
+  Revisit.push_back(F);
+  return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+  // If F is internal and all uses are in norecurse functions, then F is also
+  // norecurse.
+  if (F->doesNotRecurse())
+    return false;
+  if (F->hasInternalLinkage()) {
+    for (auto *U : F->users())
+      if (auto *I = dyn_cast<Instruction>(U)) {
+        if (!I->getParent()->getParent()->doesNotRecurse())
+          return false;
+      } else {
+        return false;
+      }
+    return setDoesNotRecurse(*F);
+  }
+  return false;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  bool Changed = false;
+
+  // We compute dedicated AA results for each function in the SCC as needed. We
+  // use a lambda referencing external objects so that they live long enough to
+  // be queried, but we re-use them each time.
+  Optional<BasicAAResult> BAR;
+  Optional<AAResults> AAR;
+  auto AARGetter = [&](Function &F) -> AAResults & {
+    BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+    AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+    return *AAR;
+  };
+
+  // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+  // whether a given CallGraphNode is in this SCC. Also track whether there are
+  // any external or opt-none nodes that will prevent us from optimizing any
+  // part of the SCC.
+  SCCNodeSet SCCNodes;
+  bool ExternalNode = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+      // External node or function we're trying not to optimize - we both avoid
+      // transform them and avoid leveraging information they provide.
+      ExternalNode = true;
+      continue;
+    }
+
+    SCCNodes.insert(F);
+  }
+
+  Changed |= addReadAttrs(SCCNodes, AARGetter);
+  Changed |= addArgumentAttrs(SCCNodes);
+
+  // If we have no external nodes participating in the SCC, we can deduce some
+  // more precise attributes as well.
+  if (!ExternalNode) {
+    Changed |= addNoAliasAttrs(SCCNodes);
+    Changed |= addNonNullAttrs(SCCNodes, *TLI);
+  }
+  
+  Changed |= addNoRecurseAttrs(SCC, Revisit);
+  return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+  bool Changed = false;
+  // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+  // the rules we have for identifying norecurse functions work best with a
+  // top-down walk, so look again at all the functions we previously marked as
+  // worth revisiting, in top-down order.
+  for (auto &F : reverse(Revisit))
+    if (F)
+      Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 0000000..d8b677b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+    "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+    cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+                                        LLVMContext &Context) {
+  SMDiagnostic Err;
+  DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+  std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+  if (!Result) {
+    Err.print("function-import", errs());
+    return nullptr;
+  }
+
+  Result->materializeMetadata();
+  UpgradeDebugInfo(*Result);
+
+  return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+  /// Cache of lazily loaded module for import.
+  StringMap<std::unique_ptr<Module>> ModuleMap;
+
+  /// Retrieve a Module from the cache or lazily load it on demand.
+  std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+  /// Create the loader, Module will be initialized in \p Context.
+  ModuleLazyLoaderCache(std::function<
+      std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+      : createLazyModule(createLazyModule) {}
+
+  /// Retrieve a Module from the cache or lazily load it on demand.
+  Module &operator()(StringRef FileName);
+
+  std::unique_ptr<Module> takeModule(StringRef FileName) {
+    auto I = ModuleMap.find(FileName);
+    assert(I != ModuleMap.end());
+    std::unique_ptr<Module> Ret = std::move(I->second);
+    ModuleMap.erase(I);
+    return Ret;
+  }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+  auto &Module = ModuleMap[Identifier];
+  if (!Module)
+    Module = createLazyModule(Identifier);
+  return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+                              const FunctionInfoIndex &Index,
+                              StringSet<> &CalledFunctions,
+                              SmallVector<StringRef, 64> &Worklist) {
+  // We need to suffix internal function calls imported from other modules,
+  // prepare the suffix ahead of time.
+  std::string Suffix;
+  if (F.getParent() != &DestModule)
+    Suffix =
+        (Twine(".llvm.") +
+         Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (isa<CallInst>(I)) {
+        auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+        // Insert any new external calls that have not already been
+        // added to set/worklist.
+        if (!CalledFunction || !CalledFunction->hasName())
+          continue;
+        // Ignore intrinsics early
+        if (CalledFunction->isIntrinsic()) {
+          assert(CalledFunction->getIntrinsicID() != 0);
+          continue;
+        }
+        auto ImportedName = CalledFunction->getName();
+        auto Renamed = (ImportedName + Suffix).str();
+        // Rename internal functions
+        if (CalledFunction->hasInternalLinkage()) {
+          ImportedName = Renamed;
+        }
+        auto It = CalledFunctions.insert(ImportedName);
+        if (!It.second) {
+          // This is a call to a function we already considered, skip.
+          continue;
+        }
+        // Ignore functions already present in the destination module
+        auto *SrcGV = DestModule.getNamedValue(ImportedName);
+        if (SrcGV) {
+          assert(isa<Function>(SrcGV) && "Name collision during import");
+          if (!cast<Function>(SrcGV)->isDeclaration()) {
+            DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+                         << ImportedName << " already in DestinationModule\n");
+            continue;
+          }
+        }
+
+        Worklist.push_back(It.first->getKey());
+        DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                     << ": Adding callee for : " << ImportedName << " : "
+                     << F.getName() << "\n");
+      }
+    }
+  }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+                          SmallVector<StringRef, 64> &Worklist,
+                          StringSet<> &CalledFunctions,
+                          std::map<StringRef, DenseSet<const GlobalValue *>>
+                              &ModuleToFunctionsToImportMap,
+                          const FunctionInfoIndex &Index,
+                          ModuleLazyLoaderCache &ModuleLoaderCache) {
+  while (!Worklist.empty()) {
+    auto CalledFunctionName = Worklist.pop_back_val();
+    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+                 << CalledFunctionName << "\n");
+
+    // Try to get a summary for this function call.
+    auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+    if (InfoList == Index.end()) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+                   << CalledFunctionName << " Ignoring.\n");
+      continue;
+    }
+    assert(!InfoList->second.empty() && "No summary, error at import?");
+
+    // Comdat can have multiple entries, FIXME: what do we do with them?
+    auto &Info = InfoList->second[0];
+    assert(Info && "Nullptr in list, error importing summaries?\n");
+
+    auto *Summary = Info->functionSummary();
+    if (!Summary) {
+      // FIXME: in case we are lazyloading summaries, we can do it now.
+      DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                   << ": Missing summary for  " << CalledFunctionName
+                   << ", error at import?\n");
+      llvm_unreachable("Missing summary");
+    }
+
+    if (Summary->instCount() > ImportInstrLimit) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+                   << CalledFunctionName << " with " << Summary->instCount()
+                   << " instructions (limit " << ImportInstrLimit << ")\n");
+      continue;
+    }
+
+    // Get the module path from the summary.
+    auto ModuleIdentifier = Summary->modulePath();
+    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+                 << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+    auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+    // The function that we will import!
+    GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+    if (!SGV) {
+      // The destination module is referencing function using their renamed name
+      // when importing a function that was originally local in the source
+      // module. The source module we have might not have been renamed so we try
+      // to remove the suffix added during the renaming to recover the original
+      // name in the source module.
+      std::pair<StringRef, StringRef> Split =
+          CalledFunctionName.split(".llvm.");
+      SGV = SrcModule.getNamedValue(Split.first);
+      assert(SGV && "Can't find function to import in source module");
+    }
+    if (!SGV) {
+      report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+                         "' in Module '" + SrcModule.getModuleIdentifier() +
+                         "', error in the summary?\n");
+    }
+
+    Function *F = dyn_cast<Function>(SGV);
+    if (!F && isa<GlobalAlias>(SGV)) {
+      auto *SGA = dyn_cast<GlobalAlias>(SGV);
+      F = dyn_cast<Function>(SGA->getBaseObject());
+      CalledFunctionName = F->getName();
+    }
+    assert(F && "Imported Function is ... not a Function");
+
+    // We cannot import weak_any functions/aliases without possibly affecting
+    // the order they are seen and selected by the linker, changing program
+    // semantics.
+    if (SGV->hasWeakAnyLinkage()) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                   << ": Ignoring import request for weak-any "
+                   << (isa<Function>(SGV) ? "function " : "alias ")
+                   << CalledFunctionName << " from "
+                   << SrcModule.getModuleIdentifier() << "\n");
+      continue;
+    }
+
+    // Add the function to the import list
+    auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+    Entry.insert(F);
+
+    // Process the newly imported functions and add callees to the worklist.
+    F->materialize();
+    findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+  }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+  DEBUG(dbgs() << "Starting import for Module "
+               << DestModule.getModuleIdentifier() << "\n");
+  unsigned ImportedCount = 0;
+
+  /// First step is collecting the called external functions.
+  StringSet<> CalledFunctions;
+  SmallVector<StringRef, 64> Worklist;
+  for (auto &F : DestModule) {
+    if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+    findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+  }
+  if (Worklist.empty())
+    return false;
+
+  /// Second step: for every call to an external function, try to import it.
+
+  // Linker that will be used for importing function
+  Linker TheLinker(DestModule);
+
+  // Map of Module -> List of Function to import from the Module
+  std::map<StringRef, DenseSet<const GlobalValue *>>
+      ModuleToFunctionsToImportMap;
+
+  // Analyze the summaries and get the list of functions to import by
+  // populating ModuleToFunctionsToImportMap
+  ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+  GetImportList(DestModule, Worklist, CalledFunctions,
+                ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+  assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+  StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+      ModuleToTempMDValsMap;
+
+  // Do the actual import of functions now, one Module at a time
+  for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+    // Get the module for the import
+    auto &FunctionsToImport = FunctionsToImportPerModule.second;
+    std::unique_ptr<Module> SrcModule =
+        ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+    assert(&DestModule.getContext() == &SrcModule->getContext() &&
+           "Context mismatch");
+
+    // Save the mapping of value ids to temporary metadata created when
+    // importing this function. If we have already imported from this module,
+    // add new temporary metadata to the existing mapping.
+    auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+    if (!TempMDVals)
+      TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+    // Link in the specified functions.
+    if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+                               &Index, &FunctionsToImport, TempMDVals.get()))
+      report_fatal_error("Function Import: link error");
+
+    ImportedCount += FunctionsToImport.size();
+  }
+
+  // Now link in metadata for all modules from which we imported functions.
+  for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+       ModuleToTempMDValsMap) {
+    // Load the specified source module.
+    auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+    // Link in all necessary metadata from this module.
+    if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+               << DestModule.getModuleIdentifier() << "\n");
+  return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+    SummaryFile("summary-file",
+                cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+  raw_ostream &OS = errs();
+  DiagnosticPrinterRawOStream DP(OS);
+  DI.print(DP);
+  OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+                        DiagnosticHandlerFunction DiagnosticHandler) {
+  std::unique_ptr<MemoryBuffer> Buffer;
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+      MemoryBuffer::getFile(Path);
+  if (std::error_code EC = BufferOrErr.getError()) {
+    Error = EC.message();
+    return nullptr;
+  }
+  Buffer = std::move(BufferOrErr.get());
+  ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+      object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+                                              DiagnosticHandler);
+  if (std::error_code EC = ObjOrErr.getError()) {
+    Error = EC.message();
+    return nullptr;
+  }
+  return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+  /// Optional function summary index to use for importing, otherwise
+  /// the summary-file option must be specified.
+  const FunctionInfoIndex *Index;
+
+public:
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  /// Specify pass name for debug output
+  const char *getPassName() const override {
+    return "Function Importing";
+  }
+
+  explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+      : ModulePass(ID), Index(Index) {}
+
+  bool runOnModule(Module &M) override {
+    if (SummaryFile.empty() && !Index)
+      report_fatal_error("error: -function-import requires -summary-file or "
+                         "file from frontend\n");
+    std::unique_ptr<FunctionInfoIndex> IndexPtr;
+    if (!SummaryFile.empty()) {
+      if (Index)
+        report_fatal_error("error: -summary-file and index from frontend\n");
+      std::string Error;
+      IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+      if (!IndexPtr) {
+        errs() << "Error loading file '" << SummaryFile << "': " << Error
+               << "\n";
+        return false;
+      }
+      Index = IndexPtr.get();
+    }
+
+    // Perform the import now.
+    auto ModuleLoader = [&M](StringRef Identifier) {
+      return loadFile(Identifier, M.getContext());
+    };
+    FunctionImporter Importer(*Index, ModuleLoader);
+    return Importer.importFunctions(M);
+
+    return false;
+  }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+                      "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+                    "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+  return new FunctionImportPass(Index);
+}
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 0000000..9b276ed
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,256 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program.  It uses an aggressive algorithm, searching out globals that are
+// known to be alive.  After it finds all of the globals which are needed, it
+// deletes whatever is left over.  This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+#include <unordered_map>
+using namespace llvm;
+
+#define DEBUG_TYPE "globaldce"
+
+STATISTIC(NumAliases  , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+  struct GlobalDCE : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    GlobalDCE() : ModulePass(ID) {
+      initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+    }
+
+    // run - Do the GlobalDCE pass on the specified module, optionally updating
+    // the specified callgraph to reflect the changes.
+    //
+    bool runOnModule(Module &M) override;
+
+  private:
+    SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+    SmallPtrSet<Constant *, 8> SeenConstants;
+    std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+
+    /// GlobalIsNeeded - mark the specific global value as needed, and
+    /// recursively mark anything that it uses as also needed.
+    void GlobalIsNeeded(GlobalValue *GV);
+    void MarkUsedGlobalsAsNeeded(Constant *C);
+
+    bool RemoveUnusedGlobalValue(GlobalValue &GV);
+  };
+}
+
+/// Returns true if F contains only a single "ret" instruction.
+static bool isEmptyFunction(Function *F) {
+  BasicBlock &Entry = F->getEntryBlock();
+  if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
+    return false;
+  ReturnInst &RI = cast<ReturnInst>(Entry.front());
+  return RI.getReturnValue() == nullptr;
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+                "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Remove empty functions from the global ctors list.
+  Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+
+  // Collect the set of members for each comdat.
+  for (Function &F : M)
+    if (Comdat *C = F.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &F));
+  for (GlobalVariable &GV : M.globals())
+    if (Comdat *C = GV.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GV));
+  for (GlobalAlias &GA : M.aliases())
+    if (Comdat *C = GA.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GA));
+
+  // Loop over the module, adding globals which are obviously necessary.
+  for (Function &F : M) {
+    Changed |= RemoveUnusedGlobalValue(F);
+    // Functions with external linkage are needed if they have a body
+    if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+      if (!F.isDiscardableIfUnused())
+        GlobalIsNeeded(&F);
+  }
+
+  for (GlobalVariable &GV : M.globals()) {
+    Changed |= RemoveUnusedGlobalValue(GV);
+    // Externally visible & appending globals are needed, if they have an
+    // initializer.
+    if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+      if (!GV.isDiscardableIfUnused())
+        GlobalIsNeeded(&GV);
+  }
+
+  for (GlobalAlias &GA : M.aliases()) {
+    Changed |= RemoveUnusedGlobalValue(GA);
+    // Externally visible aliases are needed.
+    if (!GA.isDiscardableIfUnused())
+      GlobalIsNeeded(&GA);
+  }
+
+  // Now that all globals which are needed are in the AliveGlobals set, we loop
+  // through the program, deleting those which are not alive.
+  //
+
+  // The first pass is to drop initializers of global variables which are dead.
+  std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+  for (GlobalVariable &GV : M.globals())
+    if (!AliveGlobals.count(&GV)) {
+      DeadGlobalVars.push_back(&GV);         // Keep track of dead globals
+      if (GV.hasInitializer()) {
+        Constant *Init = GV.getInitializer();
+        GV.setInitializer(nullptr);
+        if (isSafeToDestroyConstant(Init))
+          Init->destroyConstant();
+      }
+    }
+
+  // The second pass drops the bodies of functions which are dead...
+  std::vector<Function *> DeadFunctions;
+  for (Function &F : M)
+    if (!AliveGlobals.count(&F)) {
+      DeadFunctions.push_back(&F);         // Keep track of dead globals
+      if (!F.isDeclaration())
+        F.deleteBody();
+    }
+
+  // The third pass drops targets of aliases which are dead...
+  std::vector<GlobalAlias*> DeadAliases;
+  for (GlobalAlias &GA : M.aliases())
+    if (!AliveGlobals.count(&GA)) {
+      DeadAliases.push_back(&GA);
+      GA.setAliasee(nullptr);
+    }
+
+  if (!DeadFunctions.empty()) {
+    // Now that all interferences have been dropped, delete the actual objects
+    // themselves.
+    for (Function *F : DeadFunctions) {
+      RemoveUnusedGlobalValue(*F);
+      M.getFunctionList().erase(F);
+    }
+    NumFunctions += DeadFunctions.size();
+    Changed = true;
+  }
+
+  if (!DeadGlobalVars.empty()) {
+    for (GlobalVariable *GV : DeadGlobalVars) {
+      RemoveUnusedGlobalValue(*GV);
+      M.getGlobalList().erase(GV);
+    }
+    NumVariables += DeadGlobalVars.size();
+    Changed = true;
+  }
+
+  // Now delete any dead aliases.
+  if (!DeadAliases.empty()) {
+    for (GlobalAlias *GA : DeadAliases) {
+      RemoveUnusedGlobalValue(*GA);
+      M.getAliasList().erase(GA);
+    }
+    NumAliases += DeadAliases.size();
+    Changed = true;
+  }
+
+  // Make sure that all memory is released
+  AliveGlobals.clear();
+  SeenConstants.clear();
+  ComdatMembers.clear();
+
+  return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+  // If the global is already in the set, no need to reprocess it.
+  if (!AliveGlobals.insert(G).second)
+    return;
+
+  if (Comdat *C = G->getComdat()) {
+    for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+      GlobalIsNeeded(CM.second);
+  }
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+    // If this is a global variable, we must make sure to add any global values
+    // referenced by the initializer to the alive set.
+    if (GV->hasInitializer())
+      MarkUsedGlobalsAsNeeded(GV->getInitializer());
+  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+    // The target of a global alias is needed.
+    MarkUsedGlobalsAsNeeded(GA->getAliasee());
+  } else {
+    // Otherwise this must be a function object.  We have to scan the body of
+    // the function looking for constants and global values which are used as
+    // operands.  Any operands of these types must be processed to ensure that
+    // any globals used will be marked as needed.
+    Function *F = cast<Function>(G);
+
+    for (Use &U : F->operands())
+      MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
+
+    for (BasicBlock &BB : *F)
+      for (Instruction &I : BB)
+        for (Use &U : I.operands())
+          if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
+            GlobalIsNeeded(GV);
+          else if (Constant *C = dyn_cast<Constant>(U))
+            MarkUsedGlobalsAsNeeded(C);
+  }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GlobalIsNeeded(GV);
+
+  // Loop over all of the operands of the constant, adding any globals they
+  // use to the list of needed globals.
+  for (Use &U : C->operands()) {
+    // If we've already processed this constant there's no need to do it again.
+    Constant *Op = dyn_cast<Constant>(U);
+    if (Op && SeenConstants.insert(Op).second)
+      MarkUsedGlobalsAsNeeded(Op);
+  }
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it.  This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+  if (GV.use_empty())
+    return false;
+  GV.removeDeadConstantUsers();
+  return GV.use_empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 0000000..fd77369
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,3234 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken.  If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <deque>
+using namespace llvm;
+
+#define DEBUG_TYPE "globalopt"
+
+STATISTIC(NumMarked    , "Number of globals marked constant");
+STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted   , "Number of globals deleted");
+STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+
+namespace {
+  struct GlobalOpt : public ModulePass {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<DominatorTreeWrapperPass>();
+    }
+    static char ID; // Pass identification, replacement for typeid
+    GlobalOpt() : ModulePass(ID) {
+      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) override;
+
+  private:
+    bool OptimizeFunctions(Module &M);
+    bool OptimizeGlobalVars(Module &M);
+    bool OptimizeGlobalAliases(Module &M);
+    bool deleteIfDead(GlobalValue &GV);
+    bool processGlobal(GlobalValue &GV);
+    bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
+    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+
+    bool isPointerValueDeadOnEntryToFunction(const Function *F,
+                                             GlobalValue *GV);
+
+    TargetLibraryInfo *TLI;
+    SmallSet<const Comdat *, 8> NotDiscardableComdats;
+  };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+/// Is this global variable possibly used by a leak checker as a root?  If so,
+/// we might not really want to eliminate the stores to it.
+static bool isLeakCheckerRoot(GlobalVariable *GV) {
+  // A global variable is a root if it is a pointer, or could plausibly contain
+  // a pointer.  There are two challenges; one is that we could have a struct
+  // the has an inner member which is a pointer.  We recurse through the type to
+  // detect these (up to a point).  The other is that we may actually be a union
+  // of a pointer and another type, and so our LLVM type is an integer which
+  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
+  // potentially contained here.
+
+  if (GV->hasPrivateLinkage())
+    return false;
+
+  SmallVector<Type *, 4> Types;
+  Types.push_back(cast<PointerType>(GV->getType())->getElementType());
+
+  unsigned Limit = 20;
+  do {
+    Type *Ty = Types.pop_back_val();
+    switch (Ty->getTypeID()) {
+      default: break;
+      case Type::PointerTyID: return true;
+      case Type::ArrayTyID:
+      case Type::VectorTyID: {
+        SequentialType *STy = cast<SequentialType>(Ty);
+        Types.push_back(STy->getElementType());
+        break;
+      }
+      case Type::StructTyID: {
+        StructType *STy = cast<StructType>(Ty);
+        if (STy->isOpaque()) return true;
+        for (StructType::element_iterator I = STy->element_begin(),
+                 E = STy->element_end(); I != E; ++I) {
+          Type *InnerTy = *I;
+          if (isa<PointerType>(InnerTy)) return true;
+          if (isa<CompositeType>(InnerTy))
+            Types.push_back(InnerTy);
+        }
+        break;
+      }
+    }
+    if (--Limit == 0) return true;
+  } while (!Types.empty());
+  return false;
+}
+
+/// Given a value that is stored to a global but never read, determine whether
+/// it's safe to remove the store and the chain of computation that feeds the
+/// store.
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
+  do {
+    if (isa<Constant>(V))
+      return true;
+    if (!V->hasOneUse())
+      return false;
+    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+        isa<GlobalValue>(V))
+      return false;
+    if (isAllocationFn(V, TLI))
+      return true;
+
+    Instruction *I = cast<Instruction>(V);
+    if (I->mayHaveSideEffects())
+      return false;
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      if (!GEP->hasAllConstantIndices())
+        return false;
+    } else if (I->getNumOperands() != 1) {
+      return false;
+    }
+
+    V = I->getOperand(0);
+  } while (1);
+}
+
+/// This GV is a pointer root.  Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+                                    const TargetLibraryInfo *TLI) {
+  // A brief explanation of leak checkers.  The goal is to find bugs where
+  // pointers are forgotten, causing an accumulating growth in memory
+  // usage over time.  The common strategy for leak checkers is to whitelist the
+  // memory pointed to by globals at exit.  This is popular because it also
+  // solves another problem where the main thread of a C++ program may shut down
+  // before other threads that are still expecting to use those globals.  To
+  // handle that case, we expect the program may create a singleton and never
+  // destroy it.
+
+  bool Changed = false;
+
+  // If Dead[n].first is the only use of a malloc result, we can delete its
+  // chain of computation and the store to the global in Dead[n].second.
+  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
+
+  // Constants can't be pointers to dynamically allocated memory.
+  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
+       UI != E;) {
+    User *U = *UI++;
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      Value *V = SI->getValueOperand();
+      if (isa<Constant>(V)) {
+        Changed = true;
+        SI->eraseFromParent();
+      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (I->hasOneUse())
+          Dead.push_back(std::make_pair(I, SI));
+      }
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
+      if (isa<Constant>(MSI->getValue())) {
+        Changed = true;
+        MSI->eraseFromParent();
+      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
+        if (I->hasOneUse())
+          Dead.push_back(std::make_pair(I, MSI));
+      }
+    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
+      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
+      if (MemSrc && MemSrc->isConstant()) {
+        Changed = true;
+        MTI->eraseFromParent();
+      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+        if (I->hasOneUse())
+          Dead.push_back(std::make_pair(I, MTI));
+      }
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->use_empty()) {
+        CE->destroyConstant();
+        Changed = true;
+      }
+    } else if (Constant *C = dyn_cast<Constant>(U)) {
+      if (isSafeToDestroyConstant(C)) {
+        C->destroyConstant();
+        // This could have invalidated UI, start over from scratch.
+        Dead.clear();
+        CleanupPointerRootUsers(GV, TLI);
+        return true;
+      }
+    }
+  }
+
+  for (int i = 0, e = Dead.size(); i != e; ++i) {
+    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
+      Dead[i].second->eraseFromParent();
+      Instruction *I = Dead[i].first;
+      do {
+        if (isAllocationFn(I, TLI))
+          break;
+        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
+        if (!J)
+          break;
+        I->eraseFromParent();
+        I = J;
+      } while (1);
+      I->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+/// We just marked GV constant.  Loop over all users of the global, cleaning up
+/// the obvious ones.  This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft.  This returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+                                       const DataLayout &DL,
+                                       TargetLibraryInfo *TLI) {
+  bool Changed = false;
+  // Note that we need to use a weak value handle for the worklist items. When
+  // we delete a constant array, we may also be holding pointer to one of its
+  // elements (or an element of one of its elements if we're dealing with an
+  // array of arrays) in the worklist.
+  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+  while (!WorkList.empty()) {
+    Value *UV = WorkList.pop_back_val();
+    if (!UV)
+      continue;
+
+    User *U = cast<User>(UV);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (Init) {
+        // Replace the load with the initializer.
+        LI->replaceAllUsesWith(Init);
+        LI->eraseFromParent();
+        Changed = true;
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Store must be unreachable or storing Init into the global.
+      SI->eraseFromParent();
+      Changed = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr) {
+        Constant *SubInit = nullptr;
+        if (Init)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
+      } else if ((CE->getOpcode() == Instruction::BitCast &&
+                  CE->getType()->isPointerTy()) ||
+                 CE->getOpcode() == Instruction::AddrSpaceCast) {
+        // Pointer cast, delete any stores and memsets to the global.
+        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
+      }
+
+      if (CE->use_empty()) {
+        CE->destroyConstant();
+        Changed = true;
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+      // and will invalidate our notion of what Init is.
+      Constant *SubInit = nullptr;
+      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+        ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+            ConstantFoldInstruction(GEP, DL, TLI));
+        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+        // If the initializer is an all-null value and we have an inbounds GEP,
+        // we already know what the result of any load from that GEP is.
+        // TODO: Handle splats.
+        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
+      }
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
+
+      if (GEP->use_empty()) {
+        GEP->eraseFromParent();
+        Changed = true;
+      }
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+      if (MI->getRawDest() == V) {
+        MI->eraseFromParent();
+        Changed = true;
+      }
+
+    } else if (Constant *C = dyn_cast<Constant>(U)) {
+      // If we have a chain of dead constantexprs or other things dangling from
+      // us, and if they are all dead, nuke them without remorse.
+      if (isSafeToDestroyConstant(C)) {
+        C->destroyConstant();
+        CleanupConstantGlobalUsers(V, Init, DL, TLI);
+        return true;
+      }
+    }
+  }
+  return Changed;
+}
+
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+  // We might have a dead and dangling constant hanging off of here.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return isSafeToDestroyConstant(C);
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  // Loads are ok.
+  if (isa<LoadInst>(I)) return true;
+
+  // Stores *to* the pointer are ok.
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getOperand(0) != V;
+
+  // Otherwise, it must be a GEP.
+  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+  if (!GEPI) return false;
+
+  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+    return false;
+
+  for (User *U : GEPI->users())
+    if (!isSafeSROAElementUse(U))
+      return false;
+  return true;
+}
+
+
+/// U is a direct user of the specified global value.  Look at it and its uses
+/// and decide whether it is safe to SROA this global.
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+  if (!isa<GetElementPtrInst>(U) &&
+      (!isa<ConstantExpr>(U) ||
+       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+    return false;
+
+  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
+  // don't like < 3 operand CE's, and we don't like non-constant integer
+  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
+  // value of C.
+  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+      !cast<Constant>(U->getOperand(1))->isNullValue() ||
+      !isa<ConstantInt>(U->getOperand(2)))
+    return false;
+
+  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+  ++GEPI;  // Skip over the pointer index.
+
+  // If this is a use of an array allocation, do a bit more checking for sanity.
+  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+    uint64_t NumElements = AT->getNumElements();
+    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+    // Check to make sure that index falls within the array.  If not,
+    // something funny is going on, so we won't do the optimization.
+    //
+    if (Idx->getZExtValue() >= NumElements)
+      return false;
+
+    // We cannot scalar repl this level of the array unless any array
+    // sub-indices are in-range constants.  In particular, consider:
+    // A[0][i].  We cannot know that the user isn't doing invalid things like
+    // allowing i to index an out-of-range subscript that accesses A[1].
+    //
+    // Scalar replacing *just* the outer index of the array is probably not
+    // going to be a win anyway, so just give up.
+    for (++GEPI; // Skip array index.
+         GEPI != E;
+         ++GEPI) {
+      uint64_t NumElements;
+      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+        NumElements = SubArrayTy->getNumElements();
+      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+        NumElements = SubVectorTy->getNumElements();
+      else {
+        assert((*GEPI)->isStructTy() &&
+               "Indexed GEP type is not array, vector, or struct!");
+        continue;
+      }
+
+      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+        return false;
+    }
+  }
+
+  for (User *UU : U->users())
+    if (!isSafeSROAElementUse(UU))
+      return false;
+
+  return true;
+}
+
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+  for (User *U : GV->users())
+    if (!IsUserOfGlobalSafeForSRA(U, GV))
+      return false;
+
+  return true;
+}
+
+
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way.  We have determined that this
+/// transformation is safe already.  We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
+  // Make sure this global only has simple uses that we can SRA.
+  if (!GlobalUsersSafeToSRA(GV))
+    return nullptr;
+
+  assert(GV->hasLocalLinkage() && !GV->isConstant());
+  Constant *Init = GV->getInitializer();
+  Type *Ty = Init->getType();
+
+  std::vector<GlobalVariable*> NewGlobals;
+  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+  // Get the alignment of the global, either explicit or target-specific.
+  unsigned StartAlignment = GV->getAlignment();
+  if (StartAlignment == 0)
+    StartAlignment = DL.getABITypeAlignment(GV->getType());
+
+  if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    NewGlobals.reserve(STy->getNumElements());
+    const StructLayout &Layout = *DL.getStructLayout(STy);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      Constant *In = Init->getAggregateElement(i);
+      assert(In && "Couldn't get element of initializer?");
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->getThreadLocalMode(),
+                                              GV->getType()->getAddressSpace());
+      NGV->setExternallyInitialized(GV->isExternallyInitialized());
+      Globals.push_back(NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      uint64_t FieldOffset = Layout.getElementOffset(i);
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+      if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
+        NGV->setAlignment(NewAlign);
+    }
+  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+    unsigned NumElements = 0;
+    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
+      NumElements = ATy->getNumElements();
+    else
+      NumElements = cast<VectorType>(STy)->getNumElements();
+
+    if (NumElements > 16 && GV->hasNUsesOrMore(16))
+      return nullptr; // It's not worth it.
+    NewGlobals.reserve(NumElements);
+
+    uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
+    unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
+    for (unsigned i = 0, e = NumElements; i != e; ++i) {
+      Constant *In = Init->getAggregateElement(i);
+      assert(In && "Couldn't get element of initializer?");
+
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->getThreadLocalMode(),
+                                              GV->getType()->getAddressSpace());
+      NGV->setExternallyInitialized(GV->isExternallyInitialized());
+      Globals.push_back(NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+      if (NewAlign > EltAlign)
+        NGV->setAlignment(NewAlign);
+    }
+  }
+
+  if (NewGlobals.empty())
+    return nullptr;
+
+  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
+
+  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+  // Loop over all of the uses of the global, replacing the constantexpr geps,
+  // with smaller constantexpr geps or direct references.
+  while (!GV->use_empty()) {
+    User *GEP = GV->user_back();
+    assert(((isa<ConstantExpr>(GEP) &&
+             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+    // Ignore the 1th operand, which has to be zero or else the program is quite
+    // broken (undefined).  Get the 2nd operand, which is the structure or array
+    // index.
+    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+    Value *NewPtr = NewGlobals[Val];
+    Type *NewTy = NewGlobals[Val]->getValueType();
+
+    // Form a shorter GEP if needed.
+    if (GEP->getNumOperands() > 3) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+        SmallVector<Constant*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+          Idxs.push_back(CE->getOperand(i));
+        NewPtr =
+            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
+      } else {
+        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+        SmallVector<Value*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+          Idxs.push_back(GEPI->getOperand(i));
+        NewPtr = GetElementPtrInst::Create(
+            NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
+      }
+    }
+    GEP->replaceAllUsesWith(NewPtr);
+
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+      GEPI->eraseFromParent();
+    else
+      cast<ConstantExpr>(GEP)->destroyConstant();
+  }
+
+  // Delete the old global, now that it is dead.
+  Globals.erase(GV);
+  ++NumSRA;
+
+  // Loop over the new globals array deleting any globals that are obviously
+  // dead.  This can arise due to scalarization of a structure or an array that
+  // has elements that are dead.
+  unsigned FirstGlobal = 0;
+  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+    if (NewGlobals[i]->use_empty()) {
+      Globals.erase(NewGlobals[i]);
+      if (FirstGlobal == i) ++FirstGlobal;
+    }
+
+  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
+}
+
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
+  for (const User *U : V->users())
+    if (isa<LoadInst>(U)) {
+      // Will trap.
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Storing the value.
+      }
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+      if (CI->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      if (II->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If we've already seen this phi node, ignore it, it has already been
+      // checked.
+      if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+        return false;
+    } else if (isa<ICmpInst>(U) &&
+               isa<ConstantPointerNull>(U->getOperand(1))) {
+      // Ignore icmp X, null
+    } else {
+      //cerr << "NONTRAPPING USE: " << *U;
+      return false;
+    }
+
+  return true;
+}
+
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null.  Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+  for (const User *U : GV->users())
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      SmallPtrSet<const PHINode*, 8> PHIs;
+      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+        return false;
+    } else if (isa<StoreInst>(U)) {
+      // Ignore stores to the global.
+    } else {
+      // We don't know or understand this user, bail out.
+      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+      return false;
+    }
+  return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+  bool Changed = false;
+  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
+    Instruction *I = cast<Instruction>(*UI++);
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      LI->setOperand(0, NewV);
+      Changed = true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      if (SI->getOperand(1) == V) {
+        SI->setOperand(1, NewV);
+        Changed = true;
+      }
+    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+      CallSite CS(I);
+      if (CS.getCalledValue() == V) {
+        // Calling through the pointer!  Turn into a direct call, but be careful
+        // that the pointer is not also being passed as an argument.
+        CS.setCalledFunction(NewV);
+        Changed = true;
+        bool PassedAsArg = false;
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.getArgument(i) == V) {
+            PassedAsArg = true;
+            CS.setArgument(i, NewV);
+          }
+
+        if (PassedAsArg) {
+          // Being passed as an argument also.  Be careful to not invalidate UI!
+          UI = V->user_begin();
+        }
+      }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+                                ConstantExpr::getCast(CI->getOpcode(),
+                                                      NewV, CI->getType()));
+      if (CI->use_empty()) {
+        Changed = true;
+        CI->eraseFromParent();
+      }
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+      // Should handle GEP here.
+      SmallVector<Constant*, 8> Idxs;
+      Idxs.reserve(GEPI->getNumOperands()-1);
+      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+           i != e; ++i)
+        if (Constant *C = dyn_cast<Constant>(*i))
+          Idxs.push_back(C);
+        else
+          break;
+      if (Idxs.size() == GEPI->getNumOperands()-1)
+        Changed |= OptimizeAwayTrappingUsesOfValue(
+            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
+      if (GEPI->use_empty()) {
+        Changed = true;
+        GEPI->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}
+
+
+/// The specified global has only one non-null value stored into it.  If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+                                            const DataLayout &DL,
+                                            TargetLibraryInfo *TLI) {
+  bool Changed = false;
+
+  // Keep track of whether we are able to remove all the uses of the global
+  // other than the store that defines it.
+  bool AllNonStoreUsesGone = true;
+
+  // Replace all uses of loads with uses of uses of the stored value.
+  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
+    User *GlobalUser = *GUI++;
+    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      // If we were able to delete all uses of the loads
+      if (LI->use_empty()) {
+        LI->eraseFromParent();
+        Changed = true;
+      } else {
+        AllNonStoreUsesGone = false;
+      }
+    } else if (isa<StoreInst>(GlobalUser)) {
+      // Ignore the store that stores "LV" to the global.
+      assert(GlobalUser->getOperand(1) == GV &&
+             "Must be storing *to* the global");
+    } else {
+      AllNonStoreUsesGone = false;
+
+      // If we get here we could have other crazy uses that are transitively
+      // loaded.
+      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+              isa<BitCastInst>(GlobalUser) ||
+              isa<GetElementPtrInst>(GlobalUser)) &&
+             "Only expect load and stores!");
+    }
+  }
+
+  if (Changed) {
+    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
+    ++NumGlobUses;
+  }
+
+  // If we nuked all of the loads, then none of the stores are needed either,
+  // nor is the global.
+  if (AllNonStoreUsesGone) {
+    if (isLeakCheckerRoot(GV)) {
+      Changed |= CleanupPointerRootUsers(GV, TLI);
+    } else {
+      Changed = true;
+      CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
+    }
+    if (GV->use_empty()) {
+      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
+      Changed = true;
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+  }
+  return Changed;
+}
+
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
+                                TargetLibraryInfo *TLI) {
+  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
+    if (Instruction *I = dyn_cast<Instruction>(*UI++))
+      if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
+        I->replaceAllUsesWith(NewC);
+
+        // Advance UI to the next non-I use to avoid invalidating it!
+        // Instructions could multiply use V.
+        while (UI != E && *UI == I)
+          ++UI;
+        I->eraseFromParent();
+      }
+}
+
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+                              ConstantInt *NElements, const DataLayout &DL,
+                              TargetLibraryInfo *TLI) {
+  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
+
+  Type *GlobalType;
+  if (NElements->getZExtValue() == 1)
+    GlobalType = AllocTy;
+  else
+    // If we have an array allocation, the global variable is of an array.
+    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+  // Create the new global variable.  The contents of the malloc'd memory is
+  // undefined, so initialize with an undef value.
+  GlobalVariable *NewGV = new GlobalVariable(
+      *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+      UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+      GV->getThreadLocalMode());
+
+  // If there are bitcast users of the malloc (which is typical, usually we have
+  // a malloc + bitcast) then replace them with uses of the new global.  Update
+  // other users to use the global as well.
+  BitCastInst *TheBC = nullptr;
+  while (!CI->use_empty()) {
+    Instruction *User = cast<Instruction>(CI->user_back());
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      if (BCI->getType() == NewGV->getType()) {
+        BCI->replaceAllUsesWith(NewGV);
+        BCI->eraseFromParent();
+      } else {
+        BCI->setOperand(0, NewGV);
+      }
+    } else {
+      if (!TheBC)
+        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+      User->replaceUsesOfWith(CI, TheBC);
+    }
+  }
+
+  Constant *RepValue = NewGV;
+  if (NewGV->getType() != GV->getType()->getElementType())
+    RepValue = ConstantExpr::getBitCast(RepValue,
+                                        GV->getType()->getElementType());
+
+  // If there is a comparison against null, we will insert a global bool to
+  // keep track of whether the global was initialized yet or not.
+  GlobalVariable *InitBool =
+    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(GV->getContext()),
+                       GV->getName()+".init", GV->getThreadLocalMode());
+  bool InitBoolUsed = false;
+
+  // Loop over all uses of GV, processing them in turn.
+  while (!GV->use_empty()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
+      // The global is initialized when the store to it occurs.
+      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
+                    SI->getOrdering(), SI->getSynchScope(), SI);
+      SI->eraseFromParent();
+      continue;
+    }
+
+    LoadInst *LI = cast<LoadInst>(GV->user_back());
+    while (!LI->use_empty()) {
+      Use &LoadUse = *LI->use_begin();
+      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
+      if (!ICI) {
+        LoadUse = RepValue;
+        continue;
+      }
+
+      // Replace the cmp X, 0 with a use of the bool value.
+      // Sink the load to where the compare was, if atomic rules allow us to.
+      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+                               LI->getOrdering(), LI->getSynchScope(),
+                               LI->isUnordered() ? (Instruction*)ICI : LI);
+      InitBoolUsed = true;
+      switch (ICI->getPredicate()) {
+      default: llvm_unreachable("Unknown ICmp Predicate!");
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:   // X < null -> always false
+        LV = ConstantInt::getFalse(GV->getContext());
+        break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE:
+      case ICmpInst::ICMP_EQ:
+        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+        break;
+      case ICmpInst::ICMP_NE:
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT:
+        break;  // no change.
+      }
+      ICI->replaceAllUsesWith(LV);
+      ICI->eraseFromParent();
+    }
+    LI->eraseFromParent();
+  }
+
+  // If the initialization boolean was used, insert it, otherwise delete it.
+  if (!InitBoolUsed) {
+    while (!InitBool->use_empty())  // Delete initializations
+      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
+    delete InitBool;
+  } else
+    GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
+
+  // Now the GV is dead, nuke it and the malloc..
+  GV->eraseFromParent();
+  CI->eraseFromParent();
+
+  // To further other optimizations, loop over all users of NewGV and try to
+  // constant prop them.  This will promote GEP instructions with constant
+  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+  ConstantPropUsersOf(NewGV, DL, TLI);
+  if (RepValue != NewGV)
+    ConstantPropUsersOf(RepValue, DL, TLI);
+
+  return NewGV;
+}
+
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V.  We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+                                                      const GlobalVariable *GV,
+                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
+  for (const User *U : V->users()) {
+    const Instruction *Inst = cast<Instruction>(U);
+
+    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+      continue; // Fine, ignore.
+    }
+
+    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+        return false;  // Storing the pointer itself... bad.
+      continue; // Otherwise, storing through it, or storing into GV... fine.
+    }
+
+    // Must index into the array and into the struct.
+    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
+      // cycles.
+      if (PHIs.insert(PN).second)
+        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+          return false;
+      continue;
+    }
+
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    return false;
+  }
+  return true;
+}
+
+/// The Alloc pointer is stored into GV somewhere.  Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV.  This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+                                          GlobalVariable *GV) {
+  while (!Alloc->use_empty()) {
+    Instruction *U = cast<Instruction>(*Alloc->user_begin());
+    Instruction *InsertPt = U;
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // If this is the store of the allocation into the global, remove it.
+      if (SI->getOperand(1) == GV) {
+        SI->eraseFromParent();
+        continue;
+      }
+    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // Insert the load in the corresponding predecessor, not right before the
+      // PHI.
+      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
+    } else if (isa<BitCastInst>(U)) {
+      // Must be bitcast between the malloc and store to initialize the global.
+      ReplaceUsesOfMallocWithGlobal(U, GV);
+      U->eraseFromParent();
+      continue;
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      // If this is a "GEP bitcast" and the user is a store to the global, then
+      // just process it as a bitcast.
+      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
+          if (SI->getOperand(1) == GV) {
+            // Must be bitcast GEP between the malloc and store to initialize
+            // the global.
+            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+            GEPI->eraseFromParent();
+            continue;
+          }
+    }
+
+    // Insert a load from the global, and use it instead of the malloc.
+    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+    U->replaceUsesOfWith(Alloc, NL);
+  }
+}
+
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on.  This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
+                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
+  // We permit two users of the load: setcc comparing against the null
+  // pointer, and a getelementptr of a specific form.
+  for (const User *U : V->users()) {
+    const Instruction *UI = cast<Instruction>(U);
+
+    // Comparison against null is ok.
+    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return false;
+      continue;
+    }
+
+    // getelementptr is also ok, but only a simple form.
+    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
+      // Must index into the array and into the struct.
+      if (GEPI->getNumOperands() < 3)
+        return false;
+
+      // Otherwise the GEP is ok.
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
+      if (!LoadUsingPHIsPerLoad.insert(PN).second)
+        // This means some phi nodes are dependent on each other.
+        // Avoid infinite looping!
+        return false;
+      if (!LoadUsingPHIs.insert(PN).second)
+        // If we have already analyzed this PHI, then it is safe.
+        continue;
+
+      // Make sure all uses of the PHI are simple enough to transform.
+      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
+        return false;
+
+      continue;
+    }
+
+    // Otherwise we don't know what this is, not ok.
+    return false;
+  }
+
+  return true;
+}
+
+
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+                                                    Instruction *StoredVal) {
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+  for (const User *U : GV->users())
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+                                          LoadUsingPHIsPerLoad))
+        return false;
+      LoadUsingPHIsPerLoad.clear();
+    }
+
+  // If we reach here, we know that all uses of the loads and transitive uses
+  // (through PHI nodes) are simple enough to transform.  However, we don't know
+  // that all inputs the to the PHI nodes are in the same equivalence sets.
+  // Check to verify that all operands of the PHIs are either PHIS that can be
+  // transformed, loads from GV, or MI itself.
+  for (const PHINode *PN : LoadUsingPHIs) {
+    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+      Value *InVal = PN->getIncomingValue(op);
+
+      // PHI of the stored value itself is ok.
+      if (InVal == StoredVal) continue;
+
+      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+        // One of the PHIs in our set is (optimistically) ok.
+        if (LoadUsingPHIs.count(InPN))
+          continue;
+        return false;
+      }
+
+      // Load from GV is ok.
+      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+        if (LI->getOperand(0) == GV)
+          continue;
+
+      // UNDEF? NULL?
+
+      // Anything else is rejected.
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+  if (FieldNo >= FieldVals.size())
+    FieldVals.resize(FieldNo+1);
+
+  // If we already have this value, just reuse the previously scalarized
+  // version.
+  if (Value *FieldVal = FieldVals[FieldNo])
+    return FieldVal;
+
+  // Depending on what instruction this is, we have several cases.
+  Value *Result;
+  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    // This is a scalarized version of the load from the global.  Just create
+    // a new Load of the scalarized global.
+    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                           InsertedScalarizedValues,
+                                           PHIsToRewrite),
+                          LI->getName()+".f"+Twine(FieldNo), LI);
+  } else {
+    PHINode *PN = cast<PHINode>(V);
+    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
+    // field.
+
+    PointerType *PTy = cast<PointerType>(PN->getType());
+    StructType *ST = cast<StructType>(PTy->getElementType());
+
+    unsigned AS = PTy->getAddressSpace();
+    PHINode *NewPN =
+      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
+                     PN->getNumIncomingValues(),
+                     PN->getName()+".f"+Twine(FieldNo), PN);
+    Result = NewPN;
+    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+  }
+
+  return FieldVals[FieldNo] = Result;
+}
+
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  // If this is a comparison against null, handle it.
+  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+    // If we have a setcc of the loaded pointer, we can use a setcc of any
+    // field.
+    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+                                   InsertedScalarizedValues, PHIsToRewrite);
+
+    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+                              Constant::getNullValue(NPtr->getType()),
+                              SCI->getName());
+    SCI->replaceAllUsesWith(New);
+    SCI->eraseFromParent();
+    return;
+  }
+
+  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+           && "Unexpected GEPI!");
+
+    // Load the pointer for this field.
+    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+                                     InsertedScalarizedValues, PHIsToRewrite);
+
+    // Create the new GEP idx vector.
+    SmallVector<Value*, 8> GEPIdx;
+    GEPIdx.push_back(GEPI->getOperand(1));
+    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+    Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
+                                             GEPI->getName(), GEPI);
+    GEPI->replaceAllUsesWith(NGEPI);
+    GEPI->eraseFromParent();
+    return;
+  }
+
+  // Recursively transform the users of PHI nodes.  This will lazily create the
+  // PHIs that are needed for individual elements.  Keep track of what PHIs we
+  // see in InsertedScalarizedValues so that we don't get infinite loops (very
+  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
+  // already been seen first by another load, so its uses have already been
+  // processed.
+  PHINode *PN = cast<PHINode>(LoadUser);
+  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
+                                              std::vector<Value*>())).second)
+    return;
+
+  // If this is the first time we've seen this PHI, recursively process all
+  // users.
+  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+}
+
+/// We are performing Heap SRoA on a global.  Ptr is a value loaded from the
+/// global.  Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+
+  if (Load->use_empty()) {
+    Load->eraseFromParent();
+    InsertedScalarizedValues.erase(Load);
+  }
+}
+
+/// CI is an allocation of an array of structures.  Break it up into multiple
+/// allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+                                            Value *NElems, const DataLayout &DL,
+                                            const TargetLibraryInfo *TLI) {
+  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
+  Type *MAT = getMallocAllocatedType(CI, TLI);
+  StructType *STy = cast<StructType>(MAT);
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as CI, and N globals.
+  std::vector<Value*> FieldGlobals;
+  std::vector<Value*> FieldMallocs;
+
+  unsigned AS = GV->getType()->getPointerAddressSpace();
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    Type *FieldTy = STy->getElementType(FieldNo);
+    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
+
+    GlobalVariable *NGV = new GlobalVariable(
+        *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+        Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+        nullptr, GV->getThreadLocalMode());
+    FieldGlobals.push_back(NGV);
+
+    unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
+    if (StructType *ST = dyn_cast<StructType>(FieldTy))
+      TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+    Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+                                        ConstantInt::get(IntPtrTy, TypeSize),
+                                        NElems, nullptr,
+                                        CI->getName() + ".f" + Twine(FieldNo));
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, CI);
+  }
+
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  // The malloc can also fail if its argument is too large.
+  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+                                  ConstantZero, "isneg");
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                             Constant::getNullValue(FieldMallocs[i]->getType()),
+                               "isnull");
+    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = CI->getParent();
+  BasicBlock *ContBB =
+      OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
+
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+                                                "malloc_ret_null",
+                                                OrigBB->getParent());
+
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+                              Constant::getNullValue(GVVal->getType()));
+    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+                                               OrigBB->getParent());
+    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+                                         Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    CallInst::CreateFree(GVVal, BI);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
+
+    NullPtrBlock = NextBlock;
+  }
+
+  BranchInst::Create(ContBB, NullPtrBlock);
+
+  // CI is no longer needed, remove it.
+  CI->eraseFromParent();
+
+  /// As we process loads, if we can't immediately update all uses of the load,
+  /// keep track of what scalarized loads are inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      continue;
+    }
+
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+                                               Type *AllocTy,
+                                               AtomicOrdering Ordering,
+                                               const DataLayout &DL,
+                                               TargetLibraryInfo *TLI) {
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!AllocTy->isSized())
+    return false;
+
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded icmp'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  SmallPtrSet<const PHINode*, 8> PHIs;
+  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    return false;
+
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  // We cannot optimize the malloc if we cannot determine malloc array size.
+  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
+  if (!NElems)
+    return false;
+
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
+      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+      return true;
+    }
+
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  if (Ordering != NotAtomic)
+    return false;
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+
+  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+  if (!AllocSTy)
+    return false;
+
+  // This the structure has an unreasonable number of fields, leave it
+  // alone.
+  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+    // If this is a fixed size array, transform the Malloc to be an alloc of
+    // structs.  malloc [100 x struct],1 -> malloc struct, 100
+    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
+      Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+      unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
+      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                   AllocSize, NumElements,
+                                                   nullptr, CI->getName());
+      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+      CI->replaceAllUsesWith(Cast);
+      CI->eraseFromParent();
+      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
+        CI = cast<CallInst>(BCI->getOperand(0));
+      else
+        CI = cast<CallInst>(Malloc);
+    }
+
+    PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+                         TLI);
+    return true;
+  }
+
+  return false;
+}
+
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+                                     AtomicOrdering Ordering,
+                                     const DataLayout &DL,
+                                     TargetLibraryInfo *TLI) {
+  // Ignore no-op GEPs and bitcasts.
+  StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+  // If we are dealing with a pointer global that is initialized to null and
+  // only has one (non-null) value stored into it, then we can optimize any
+  // users of the loaded value (often calls and loads) that would trap if the
+  // value was null.
+  if (GV->getInitializer()->getType()->isPointerTy() &&
+      GV->getInitializer()->isNullValue()) {
+    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+      if (GV->getInitializer()->getType() != SOVC->getType())
+        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+      // Optimize away any trapping uses of the loaded value.
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
+        return true;
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+      Type *MallocType = getMallocAllocatedType(CI, TLI);
+      if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+                                                           Ordering, DL, TLI))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal.  See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used.  This exposes
+/// the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+  Type *GVElType = GV->getType()->getElementType();
+
+  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
+  // an FP value, pointer or vector, don't do this optimization because a select
+  // between them is very expensive and unlikely to lead to later
+  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
+  // where v1 and v2 both require constant pool loads, a big loss.
+  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+      GVElType->isFloatingPointTy() ||
+      GVElType->isPointerTy() || GVElType->isVectorTy())
+    return false;
+
+  // Walk the use list of the global seeing if all the uses are load or store.
+  // If there is anything else, bail out.
+  for (User *U : GV->users())
+    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+      return false;
+
+  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
+
+  // Create the new global, initializing it to false.
+  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+                                             false,
+                                             GlobalValue::InternalLinkage,
+                                        ConstantInt::getFalse(GV->getContext()),
+                                             GV->getName()+".b",
+                                             GV->getThreadLocalMode(),
+                                             GV->getType()->getAddressSpace());
+  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
+
+  Constant *InitVal = GV->getInitializer();
+  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+         "No reason to shrink to bool!");
+
+  // If initialized to zero and storing one into the global, we can use a cast
+  // instead of a select to synthesize the desired value.
+  bool IsOneZero = false;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+    IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+  while (!GV->use_empty()) {
+    Instruction *UI = cast<Instruction>(GV->user_back());
+    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+      // Change the store into a boolean store.
+      bool StoringOther = SI->getOperand(0) == OtherVal;
+      // Only do this if we weren't storing a loaded value.
+      Value *StoreVal;
+      if (StoringOther || SI->getOperand(0) == InitVal) {
+        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+                                    StoringOther);
+      } else {
+        // Otherwise, we are storing a previously loaded copy.  To do this,
+        // change the copy from copying the original value to just copying the
+        // bool.
+        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+        // If we've already replaced the input, StoredVal will be a cast or
+        // select instruction.  If not, it will be a load of the original
+        // global.
+        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+          assert(LI->getOperand(0) == GV && "Not a copy!");
+          // Insert a new load, to preserve the saved value.
+          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+                                  LI->getOrdering(), LI->getSynchScope(), LI);
+        } else {
+          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+                 "This is not a form that we understand!");
+          StoreVal = StoredVal->getOperand(0);
+          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+        }
+      }
+      new StoreInst(StoreVal, NewGV, false, 0,
+                    SI->getOrdering(), SI->getSynchScope(), SI);
+    } else {
+      // Change the load into a load of bool then a select.
+      LoadInst *LI = cast<LoadInst>(UI);
+      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+                                   LI->getOrdering(), LI->getSynchScope(), LI);
+      Value *NSI;
+      if (IsOneZero)
+        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+      else
+        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+      NSI->takeName(LI);
+      LI->replaceAllUsesWith(NSI);
+    }
+    UI->eraseFromParent();
+  }
+
+  // Retain the name of the old global variable. People who are debugging their
+  // programs may expect these variables to be named the same.
+  NewGV->takeName(GV);
+  GV->eraseFromParent();
+  return true;
+}
+
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+  GV.removeDeadConstantUsers();
+
+  if (!GV.isDiscardableIfUnused())
+    return false;
+
+  if (const Comdat *C = GV.getComdat())
+    if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+      return false;
+
+  bool Dead;
+  if (auto *F = dyn_cast<Function>(&GV))
+    Dead = F->isDefTriviallyDead();
+  else
+    Dead = GV.use_empty();
+  if (!Dead)
+    return false;
+
+  DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+  GV.eraseFromParent();
+  ++NumDeleted;
+  return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible.  If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
+  // Do more involved optimizations if the global is internal.
+  if (!GV.hasLocalLinkage())
+    return false;
+
+  GlobalStatus GS;
+
+  if (GlobalStatus::analyzeGlobal(&GV, GS))
+    return false;
+
+  bool Changed = false;
+  if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+    GV.setUnnamedAddr(true);
+    NumUnnamed++;
+    Changed = true;
+  }
+
+  auto *GVar = dyn_cast<GlobalVariable>(&GV);
+  if (!GVar)
+    return Changed;
+
+  if (GVar->isConstant() || !GVar->hasInitializer())
+    return Changed;
+
+  return processInternalGlobal(GVar, GS) || Changed;
+}
+
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+  // Find all uses of GV. We expect them all to be in F, and if we can't
+  // identify any of the uses we bail out.
+  //
+  // On each of these uses, identify if the memory that GV points to is
+  // used/required/live at the start of the function. If it is not, for example
+  // if the first thing the function does is store to the GV, the GV can
+  // possibly be demoted.
+  //
+  // We don't do an exhaustive search for memory operations - simply look
+  // through bitcasts as they're quite common and benign.
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  SmallVector<LoadInst *, 4> Loads;
+  SmallVector<StoreInst *, 4> Stores;
+  for (auto *U : GV->users()) {
+    if (Operator::getOpcode(U) == Instruction::BitCast) {
+      for (auto *UU : U->users()) {
+        if (auto *LI = dyn_cast<LoadInst>(UU))
+          Loads.push_back(LI);
+        else if (auto *SI = dyn_cast<StoreInst>(UU))
+          Stores.push_back(SI);
+        else
+          return false;
+      }
+      continue;
+    }
+
+    Instruction *I = dyn_cast<Instruction>(U);
+    if (!I)
+      return false;
+    assert(I->getParent()->getParent() == F);
+
+    if (auto *LI = dyn_cast<LoadInst>(I))
+      Loads.push_back(LI);
+    else if (auto *SI = dyn_cast<StoreInst>(I))
+      Stores.push_back(SI);
+    else
+      return false;
+  }
+
+  // We have identified all uses of GV into loads and stores. Now check if all
+  // of them are known not to depend on the value of the global at the function
+  // entry point. We do this by ensuring that every load is dominated by at
+  // least one store.
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+                 .getDomTree();
+
+  // The below check is quadratic. Check we're not going to do too many tests.
+  // FIXME: Even though this will always have worst-case quadratic time, we
+  // could put effort into minimizing the average time by putting stores that
+  // have been shown to dominate at least one load at the beginning of the
+  // Stores array, making subsequent dominance checks more likely to succeed
+  // early.
+  //
+  // The threshold here is fairly large because global->local demotion is a
+  // very powerful optimization should it fire.
+  const unsigned Threshold = 100;
+  if (Loads.size() * Stores.size() > Threshold)
+    return false;
+
+  for (auto *L : Loads) {
+    auto *LTy = L->getType();
+    if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+          auto *STy = S->getValueOperand()->getType();
+          // The load is only dominated by the store if DomTree says so
+          // and the number of bits loaded in L is less than or equal to
+          // the number of bits stored in S.
+          return DT.dominates(S, L) &&
+                 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+        }))
+      return false;
+  }
+  // All loads have known dependences inside F, so the global can be localized.
+  return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+  // We don't do this exhaustively. The most common pattern that we really need
+  // to care about is a constant GEP or constant bitcast - so just looking
+  // through one single ConstantExpr.
+  //
+  // The set of constants that this function returns true for must be able to be
+  // handled by makeAllConstantUsesInstructions.
+  for (auto *U : C->users()) {
+    if (isa<Instruction>(U))
+      continue;
+    if (!isa<ConstantExpr>(U))
+      // Non instruction, non-constantexpr user; cannot convert this.
+      return false;
+    for (auto *UU : U->users())
+      if (!isa<Instruction>(UU))
+        // A constantexpr used by another constant. We don't try and recurse any
+        // further but just bail out at this point.
+        return false;
+  }
+
+  return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+  SmallVector<ConstantExpr*,4> Users;
+  for (auto *U : C->users()) {
+    if (isa<ConstantExpr>(U))
+      Users.push_back(cast<ConstantExpr>(U));
+    else
+      // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+      // should not have returned true for C.
+      assert(
+          isa<Instruction>(U) &&
+          "Can't transform non-constantexpr non-instruction to instruction!");
+  }
+
+  SmallVector<Value*,4> UUsers;
+  for (auto *U : Users) {
+    UUsers.clear();
+    for (auto *UU : U->users())
+      UUsers.push_back(UU);
+    for (auto *UU : UUsers) {
+      Instruction *UI = cast<Instruction>(UU);
+      Instruction *NewU = U->getAsInstruction();
+      NewU->insertBefore(UI);
+      UI->replaceUsesOfWith(U, NewU);
+    }
+    U->dropAllReferences();
+  }
+}
+
+/// Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
+                                      const GlobalStatus &GS) {
+  auto &DL = GV->getParent()->getDataLayout();
+  // If this is a first class global and has only one accessing function and
+  // this function is non-recursive, we replace the global with a local alloca
+  // in this function.
+  //
+  // NOTE: It doesn't make sense to promote non-single-value types since we
+  // are just replacing static memory to stack memory.
+  //
+  // If the global is in different address space, don't bring it to stack.
+  if (!GS.HasMultipleAccessingFunctions &&
+      GS.AccessingFunction &&
+      GV->getType()->getElementType()->isSingleValueType() &&
+      GV->getType()->getAddressSpace() == 0 &&
+      !GV->isExternallyInitialized() &&
+      allNonInstructionUsersCanBeMadeInstructions(GV) &&
+      GS.AccessingFunction->doesNotRecurse() &&
+      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
+    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+                                                   ->getEntryBlock().begin());
+    Type *ElemTy = GV->getType()->getElementType();
+    // FIXME: Pass Global's alignment when globals have alignment
+    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+                                        GV->getName(), &FirstI);
+    if (!isa<UndefValue>(GV->getInitializer()))
+      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+    makeAllConstantUsesInstructions(GV);
+    
+    GV->replaceAllUsesWith(Alloca);
+    GV->eraseFromParent();
+    ++NumLocalized;
+    return true;
+  }
+
+  // If the global is never loaded (but may be stored to), it is dead.
+  // Delete it now.
+  if (!GS.IsLoaded) {
+    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
+
+    bool Changed;
+    if (isLeakCheckerRoot(GV)) {
+      // Delete any constant stores to the global.
+      Changed = CleanupPointerRootUsers(GV, TLI);
+    } else {
+      // Delete any stores we can find to the global.  We may not be able to
+      // make it completely dead though.
+      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+    }
+
+    // If the global is dead now, delete it.
+    if (GV->use_empty()) {
+      GV->eraseFromParent();
+      ++NumDeleted;
+      Changed = true;
+    }
+    return Changed;
+
+  } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
+    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
+    GV->setConstant(true);
+
+    // Clean up any obviously simplifiable users now.
+    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+
+    // If the global is dead now, just nuke it.
+    if (GV->use_empty()) {
+      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
+            << "all users and delete global!\n");
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+
+    ++NumMarked;
+    return true;
+  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+    const DataLayout &DL = GV->getParent()->getDataLayout();
+    if (SRAGlobal(GV, DL))
+      return true;
+  } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+    // If the initial value for the global was an undef value, and if only
+    // one other value was stored into it, we can just change the
+    // initializer to be the stored value, then delete all stores to the
+    // global.  This allows us to mark it constant.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (isa<UndefValue>(GV->getInitializer())) {
+        // Change the initial value here.
+        GV->setInitializer(SOVConstant);
+
+        // Clean up any obviously simplifiable users now.
+        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
+
+        if (GV->use_empty()) {
+          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
+                       << "simplify all users and delete global!\n");
+          GV->eraseFromParent();
+          ++NumDeleted;
+        }
+        ++NumSubstitute;
+        return true;
+      }
+
+    // Try to optimize globals based on the knowledge that only one value
+    // (besides its initializer) is ever stored to the global.
+    if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
+      return true;
+
+    // Otherwise, if the global was not a boolean, we can shrink it to be a
+    // boolean.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+      if (GS.Ordering == NotAtomic) {
+        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+          ++NumShrunkToBool;
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+  for (User *U : F->users()) {
+    if (isa<BlockAddress>(U))
+      continue;
+    CallSite CS(cast<Instruction>(U));
+    CS.setCallingConv(CallingConv::Fast);
+  }
+}
+
+static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    unsigned Index = Attrs.getSlotIndex(i);
+    if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
+      continue;
+
+    // There can be only one.
+    return Attrs.removeAttribute(C, Index, Attribute::Nest);
+  }
+
+  return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+  for (User *U : F->users()) {
+    if (isa<BlockAddress>(U))
+      continue;
+    CallSite CS(cast<Instruction>(U));
+    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+  }
+}
+
+/// Return true if this is a calling convention that we'd like to change.  The
+/// idea here is that we don't want to mess with the convention if the user
+/// explicitly requested something with performance implications like coldcc,
+/// GHC, or anyregcc.
+static bool isProfitableToMakeFastCC(Function *F) {
+  CallingConv::ID CC = F->getCallingConv();
+  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
+  return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+  bool Changed = false;
+  // Optimize functions.
+  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+    Function *F = &*FI++;
+    // Functions without names cannot be referenced outside this module.
+    if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
+      F->setLinkage(GlobalValue::InternalLinkage);
+
+    if (deleteIfDead(*F)) {
+      Changed = true;
+      continue;
+    }
+
+    Changed |= processGlobal(*F);
+
+    if (!F->hasLocalLinkage())
+      continue;
+    if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+        !F->hasAddressTaken()) {
+      // If this function has a calling convention worth changing, is not a
+      // varargs function, and is only called directly, promote it to use the
+      // Fast calling convention.
+      F->setCallingConv(CallingConv::Fast);
+      ChangeCalleesToFastCall(F);
+      ++NumFastCallFns;
+      Changed = true;
+    }
+
+    if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+        !F->hasAddressTaken()) {
+      // The function is not used by a trampoline intrinsic, so it is safe
+      // to remove the 'nest' attribute.
+      RemoveNestAttribute(F);
+      ++NumNestRemoved;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+  bool Changed = false;
+
+  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; ) {
+    GlobalVariable *GV = &*GVI++;
+    // Global variables without names cannot be referenced outside this module.
+    if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
+      GV->setLinkage(GlobalValue::InternalLinkage);
+    // Simplify the initializer.
+    if (GV->hasInitializer())
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+        auto &DL = M.getDataLayout();
+        Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
+        if (New && New != CE)
+          GV->setInitializer(New);
+      }
+
+    if (deleteIfDead(*GV)) {
+      Changed = true;
+      continue;
+    }
+
+    Changed |= processGlobal(*GV);
+  }
+  return Changed;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSetImpl<Constant *> &SimpleConstants,
+                            const DataLayout &DL);
+
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
+///   void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+                                  SmallPtrSetImpl<Constant *> &SimpleConstants,
+                                  const DataLayout &DL) {
+  // Simple global addresses are supported, do not allow dllimport or
+  // thread-local globals.
+  if (auto *GV = dyn_cast<GlobalValue>(C))
+    return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+  // Simple integer, undef, constant aggregate zero, etc are all supported.
+  if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
+    return true;
+
+  // Aggregate values are safe if all their elements are.
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+      isa<ConstantVector>(C)) {
+    for (Value *Op : C->operands())
+      if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
+        return false;
+    return true;
+  }
+
+  // We don't know exactly what relocations are allowed in constant expressions,
+  // so we allow &global+constantoffset, which is safe and uniformly supported
+  // across targets.
+  ConstantExpr *CE = cast<ConstantExpr>(C);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+    // Bitcast is fine if the casted value is fine.
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    // int <=> ptr is fine if the int type is the same size as the
+    // pointer type.
+    if (DL.getTypeSizeInBits(CE->getType()) !=
+        DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+  // GEP is fine if it is simple + constant offset.
+  case Instruction::GetElementPtr:
+    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+      if (!isa<ConstantInt>(CE->getOperand(i)))
+        return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+
+  case Instruction::Add:
+    // We allow simple+cst.
+    if (!isa<ConstantInt>(CE->getOperand(1)))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
+  }
+  return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSetImpl<Constant *> &SimpleConstants,
+                            const DataLayout &DL) {
+  // If we already checked this constant, we win.
+  if (!SimpleConstants.insert(C).second)
+    return true;
+  // Check the constant.
+  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
+}
+
+
+/// Return true if this constant is simple enough for us to understand.  In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt.  We basically just support direct accesses to
+/// globals and GEP's of globals.  This should be kept up to date with
+/// CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+  // Conservatively, avoid aggregate types. This is because we don't
+  // want to worry about them partially overlapping other stores.
+  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+    return false;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    // Do not allow weak/*_odr/linkonce linkage or external globals.
+    return GV->hasUniqueInitializer();
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Handle a constantexpr gep.
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0)) &&
+        cast<GEPOperator>(CE)->isInBounds()) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      if (!GV->hasUniqueInitializer())
+        return false;
+
+      // The first index must be zero.
+      ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
+      if (!CI || !CI->isZero()) return false;
+
+      // The remaining indices must be compile-time known integers within the
+      // notional bounds of the corresponding static array types.
+      if (!CE->isGEPWithNoNotionalOverIndexing())
+        return false;
+
+      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+    // A constantexpr bitcast from a pointer to another pointer is a no-op,
+    // and we know how to evaluate it by moving the bitcast from the pointer
+    // operand to the value operand.
+    } else if (CE->getOpcode() == Instruction::BitCast &&
+               isa<GlobalVariable>(CE->getOperand(0))) {
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+    }
+  }
+
+  return false;
+}
+
+/// Evaluate a piece of a constantexpr store into a global initializer.  This
+/// returns 'Init' modified to reflect 'Val' stored into it.  At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+                                   ConstantExpr *Addr, unsigned OpNo) {
+  // Base case of the recursion.
+  if (OpNo == Addr->getNumOperands()) {
+    assert(Val->getType() == Init->getType() && "Type mismatch!");
+    return Val;
+  }
+
+  SmallVector<Constant*, 32> Elts;
+  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
+    // Break up the constant into its elements.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      Elts.push_back(Init->getAggregateElement(i));
+
+    // Replace the element that we are supposed to.
+    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+    unsigned Idx = CU->getZExtValue();
+    assert(Idx < STy->getNumElements() && "Struct index out of range!");
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+    // Return the modified struct.
+    return ConstantStruct::get(STy, Elts);
+  }
+
+  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+  SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+  uint64_t NumElts;
+  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+    NumElts = ATy->getNumElements();
+  else
+    NumElts = InitTy->getVectorNumElements();
+
+  // Break up the array into elements.
+  for (uint64_t i = 0, e = NumElts; i != e; ++i)
+    Elts.push_back(Init->getAggregateElement(i));
+
+  assert(CI->getZExtValue() < NumElts);
+  Elts[CI->getZExtValue()] =
+    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+  if (Init->getType()->isArrayTy())
+    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+  return ConstantVector::get(Elts);
+}
+
+/// We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    assert(GV->hasInitializer());
+    GV->setInitializer(Val);
+    return;
+  }
+
+  ConstantExpr *CE = cast<ConstantExpr>(Addr);
+  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+namespace {
+
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction.  Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete.  Once an evaluation call
+/// fails, the evaluation object should not be reused.
+class Evaluator {
+public:
+  Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+      : DL(DL), TLI(TLI) {
+    ValueStack.emplace_back();
+  }
+
+  ~Evaluator() {
+    for (auto &Tmp : AllocaTmps)
+      // If there are still users of the alloca, the program is doing something
+      // silly, e.g. storing the address of the alloca somewhere and using it
+      // later.  Since this is undefined, we'll just make it be null.
+      if (!Tmp->use_empty())
+        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+  }
+
+  /// Evaluate a call to function F, returning true if successful, false if we
+  /// can't evaluate it.  ActualArgs contains the formal arguments for the
+  /// function.
+  bool EvaluateFunction(Function *F, Constant *&RetVal,
+                        const SmallVectorImpl<Constant*> &ActualArgs);
+
+  /// Evaluate all instructions in block BB, returning true if successful, false
+  /// if we can't evaluate it.  NewBB returns the next BB that control flows
+  /// into, or null upon return.
+  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
+
+  Constant *getVal(Value *V) {
+    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+    Constant *R = ValueStack.back().lookup(V);
+    assert(R && "Reference to an uncomputed value!");
+    return R;
+  }
+
+  void setVal(Value *V, Constant *C) {
+    ValueStack.back()[V] = C;
+  }
+
+  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
+    return MutatedMemory;
+  }
+
+  const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const {
+    return Invariants;
+  }
+
+private:
+  Constant *ComputeLoadResult(Constant *P);
+
+  /// As we compute SSA register values, we store their contents here. The back
+  /// of the deque contains the current function and the stack contains the
+  /// values in the calling frames.
+  std::deque<DenseMap<Value*, Constant*>> ValueStack;
+
+  /// This is used to detect recursion.  In pathological situations we could hit
+  /// exponential behavior, but at least there is nothing unbounded.
+  SmallVector<Function*, 4> CallStack;
+
+  /// For each store we execute, we update this map.  Loads check this to get
+  /// the most up-to-date value.  If evaluation is successful, this state is
+  /// committed to the process.
+  DenseMap<Constant*, Constant*> MutatedMemory;
+
+  /// To 'execute' an alloca, we create a temporary global variable to represent
+  /// its body.  This vector is needed so we can delete the temporary globals
+  /// when we are done.
+  SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
+
+  /// These global variables have been marked invariant by the static
+  /// constructor.
+  SmallPtrSet<GlobalVariable*, 8> Invariants;
+
+  /// These are constants we have checked and know to be simple enough to live
+  /// in a static initializer of a global.
+  SmallPtrSet<Constant*, 8> SimpleConstants;
+
+  const DataLayout &DL;
+  const TargetLibraryInfo *TLI;
+};
+
+}  // anonymous namespace
+
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed.  If we can't decide, return null.
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
+  // If this memory location has been recently stored, use the stored value: it
+  // is the most up-to-date.
+  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+  if (I != MutatedMemory.end()) return I->second;
+
+  // Access it.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+    if (GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+    return nullptr;
+  }
+
+  // Handle a constantexpr getelementptr.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0))) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      if (GV->hasDefinitiveInitializer())
+        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    }
+
+  return nullptr;  // don't know how to evaluate.
+}
+
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it.  NewBB returns the next BB that control flows into,
+/// or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+                              BasicBlock *&NextBB) {
+  // This is the main evaluation loop.
+  while (1) {
+    Constant *InstResult = nullptr;
+
+    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+      if (!SI->isSimple()) {
+        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+        return false;  // no volatile/atomic accesses.
+      }
+      Constant *Ptr = getVal(SI->getOperand(1));
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
+        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+      }
+      if (!isSimpleEnoughPointerToCommit(Ptr)) {
+        // If this is too complex for us to commit, reject it.
+        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
+        return false;
+      }
+
+      Constant *Val = getVal(SI->getOperand(0));
+
+      // If this might be too difficult for the backend to handle (e.g. the addr
+      // of one global variable divided by another) then we can't commit it.
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
+        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+              << "\n");
+        return false;
+      }
+
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+        if (CE->getOpcode() == Instruction::BitCast) {
+          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
+          // If we're evaluating a store through a bitcast, then we need
+          // to pull the bitcast off the pointer type and push it onto the
+          // stored value.
+          Ptr = CE->getOperand(0);
+
+          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+          // In order to push the bitcast onto the stored value, a bitcast
+          // from NewTy to Val's type must be legal.  If it's not, we can try
+          // introspecting NewTy to find a legal conversion.
+          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+            // If NewTy is a struct, we can convert the pointer to the struct
+            // into a pointer to its first member.
+            // FIXME: This could be extended to support arrays as well.
+            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
+              NewTy = STy->getTypeAtIndex(0U);
+
+              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
+              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+              Constant * const IdxList[] = {IdxZero, IdxZero};
+
+              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
+              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+                Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+
+            // If we can't improve the situation by introspecting NewTy,
+            // we have to give up.
+            } else {
+              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+                    "evaluate.\n");
+              return false;
+            }
+          }
+
+          // If we found compatible types, go ahead and push the bitcast
+          // onto the stored value.
+          Val = ConstantExpr::getBitCast(Val, NewTy);
+
+          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
+        }
+      }
+
+      MutatedMemory[Ptr] = Val;
+    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+      InstResult = ConstantExpr::get(BO->getOpcode(),
+                                     getVal(BO->getOperand(0)),
+                                     getVal(BO->getOperand(1)));
+      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+            << "\n");
+    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+                                            getVal(CI->getOperand(0)),
+                                            getVal(CI->getOperand(1)));
+      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+            << "\n");
+    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+      InstResult = ConstantExpr::getCast(CI->getOpcode(),
+                                         getVal(CI->getOperand(0)),
+                                         CI->getType());
+      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+            << "\n");
+    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+                                           getVal(SI->getOperand(1)),
+                                           getVal(SI->getOperand(2)));
+      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+            << "\n");
+    } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+      InstResult = ConstantExpr::getExtractValue(
+          getVal(EVI->getAggregateOperand()), EVI->getIndices());
+      DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+                   << "\n");
+    } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+      InstResult = ConstantExpr::getInsertValue(
+          getVal(IVI->getAggregateOperand()),
+          getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+      DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+                   << "\n");
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+      Constant *P = getVal(GEP->getOperand(0));
+      SmallVector<Constant*, 8> GEPOps;
+      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+           i != e; ++i)
+        GEPOps.push_back(getVal(*i));
+      InstResult =
+          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+                                         cast<GEPOperator>(GEP)->isInBounds());
+      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+            << "\n");
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+
+      if (!LI->isSimple()) {
+        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+        return false;  // no volatile/atomic accesses.
+      }
+
+      Constant *Ptr = getVal(LI->getOperand(0));
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+        DEBUG(dbgs() << "Found a constant pointer expression, constant "
+              "folding: " << *Ptr << "\n");
+      }
+      InstResult = ComputeLoadResult(Ptr);
+      if (!InstResult) {
+        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+              "\n");
+        return false; // Could not evaluate load.
+      }
+
+      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+      if (AI->isArrayAllocation()) {
+        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+        return false;  // Cannot handle array allocs.
+      }
+      Type *Ty = AI->getType()->getElementType();
+      AllocaTmps.push_back(
+          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+                                      UndefValue::get(Ty), AI->getName()));
+      InstResult = AllocaTmps.back().get();
+      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
+    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+      CallSite CS(&*CurInst);
+
+      // Debug info can safely be ignored here.
+      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+        DEBUG(dbgs() << "Ignoring debug info.\n");
+        ++CurInst;
+        continue;
+      }
+
+      // Cannot handle inline asm.
+      if (isa<InlineAsm>(CS.getCalledValue())) {
+        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+        return false;
+      }
+
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+          if (MSI->isVolatile()) {
+            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+                  "intrinsic.\n");
+            return false;
+          }
+          Constant *Ptr = getVal(MSI->getDest());
+          Constant *Val = getVal(MSI->getValue());
+          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+            // This memset is a no-op.
+            DEBUG(dbgs() << "Ignoring no-op memset.\n");
+            ++CurInst;
+            continue;
+          }
+        }
+
+        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+            II->getIntrinsicID() == Intrinsic::lifetime_end) {
+          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
+          ++CurInst;
+          continue;
+        }
+
+        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+          // We don't insert an entry into Values, as it doesn't have a
+          // meaningful return value.
+          if (!II->use_empty()) {
+            DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
+            return false;
+          }
+          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+          Value *PtrArg = getVal(II->getArgOperand(1));
+          Value *Ptr = PtrArg->stripPointerCasts();
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
+            if (!Size->isAllOnesValue() &&
+                Size->getValue().getLimitedValue() >=
+                    DL.getTypeStoreSize(ElemTy)) {
+              Invariants.insert(GV);
+              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+                    << "\n");
+            } else {
+              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+                    "invariant.\n");
+            }
+          }
+          // Continue even if we do nothing.
+          ++CurInst;
+          continue;
+        } else if (II->getIntrinsicID() == Intrinsic::assume) {
+          DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+          ++CurInst;
+          continue;
+        }
+
+        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
+        return false;
+      }
+
+      // Resolve function pointers.
+      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+      if (!Callee || Callee->mayBeOverridden()) {
+        DEBUG(dbgs() << "Can not resolve function pointer.\n");
+        return false;  // Cannot resolve.
+      }
+
+      SmallVector<Constant*, 8> Formals;
+      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+        Formals.push_back(getVal(*i));
+
+      if (Callee->isDeclaration()) {
+        // If this is a function we can constant fold, do it.
+        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
+          InstResult = C;
+          DEBUG(dbgs() << "Constant folded function call. Result: " <<
+                *InstResult << "\n");
+        } else {
+          DEBUG(dbgs() << "Can not constant fold function call.\n");
+          return false;
+        }
+      } else {
+        if (Callee->getFunctionType()->isVarArg()) {
+          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
+          return false;
+        }
+
+        Constant *RetVal = nullptr;
+        // Execute the call, if successful, use the return value.
+        ValueStack.emplace_back();
+        if (!EvaluateFunction(Callee, RetVal, Formals)) {
+          DEBUG(dbgs() << "Failed to evaluate function.\n");
+          return false;
+        }
+        ValueStack.pop_back();
+        InstResult = RetVal;
+
+        if (InstResult) {
+          DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
+                InstResult << "\n\n");
+        } else {
+          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+        }
+      }
+    } else if (isa<TerminatorInst>(CurInst)) {
+      DEBUG(dbgs() << "Found a terminator instruction.\n");
+
+      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+        if (BI->isUnconditional()) {
+          NextBB = BI->getSuccessor(0);
+        } else {
+          ConstantInt *Cond =
+            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
+          if (!Cond) return false;  // Cannot determine.
+
+          NextBB = BI->getSuccessor(!Cond->getZExtValue());
+        }
+      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+        ConstantInt *Val =
+          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
+        if (!Val) return false;  // Cannot determine.
+        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
+        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+          NextBB = BA->getBasicBlock();
+        else
+          return false;  // Cannot determine.
+      } else if (isa<ReturnInst>(CurInst)) {
+        NextBB = nullptr;
+      } else {
+        // invoke, unwind, resume, unreachable.
+        DEBUG(dbgs() << "Can not handle terminator.");
+        return false;  // Cannot handle this terminator.
+      }
+
+      // We succeeded at evaluating this block!
+      DEBUG(dbgs() << "Successfully evaluated block.\n");
+      return true;
+    } else {
+      // Did not know how to evaluate this!
+      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+            "\n");
+      return false;
+    }
+
+    if (!CurInst->use_empty()) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+        InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
+
+      setVal(&*CurInst, InstResult);
+    }
+
+    // If we just processed an invoke, we finished evaluating the block.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+      NextBB = II->getNormalDest();
+      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
+      return true;
+    }
+
+    // Advance program counter.
+    ++CurInst;
+  }
+}
+
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it.  ActualArgs contains the formal arguments for the
+/// function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+                                 const SmallVectorImpl<Constant*> &ActualArgs) {
+  // Check to see if this function is already executing (recursion).  If so,
+  // bail out.  TODO: we might want to accept limited recursion.
+  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+    return false;
+
+  CallStack.push_back(F);
+
+  // Initialize arguments to the incoming values specified.
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+       ++AI, ++ArgNo)
+    setVal(&*AI, ActualArgs[ArgNo]);
+
+  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
+  // we can only evaluate any one basic block at most once.  This set keeps
+  // track of what we have executed so we can detect recursive cases etc.
+  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+  // CurBB - The current basic block we're evaluating.
+  BasicBlock *CurBB = &F->front();
+
+  BasicBlock::iterator CurInst = CurBB->begin();
+
+  while (1) {
+    BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
+    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
+    if (!EvaluateBlock(CurInst, NextBB))
+      return false;
+
+    if (!NextBB) {
+      // Successfully running until there's no next block means that we found
+      // the return.  Fill it the return value and pop the call stack.
+      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+      if (RI->getNumOperands())
+        RetVal = getVal(RI->getOperand(0));
+      CallStack.pop_back();
+      return true;
+    }
+
+    // Okay, we succeeded in evaluating this control flow.  See if we have
+    // executed the new block before.  If so, we have a looping function,
+    // which we cannot evaluate in reasonable time.
+    if (!ExecutedBlocks.insert(NextBB).second)
+      return false;  // looped!
+
+    // Okay, we have never been in this block before.  Check to see if there
+    // are any PHI nodes.  If so, evaluate them with information about where
+    // we came from.
+    PHINode *PN = nullptr;
+    for (CurInst = NextBB->begin();
+         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+    // Advance to the next block.
+    CurBB = NextBB;
+  }
+}
+
+/// Evaluate static constructors in the function, if we can.  Return true if we
+/// can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
+                                      const TargetLibraryInfo *TLI) {
+  // Call the function.
+  Evaluator Eval(DL, TLI);
+  Constant *RetValDummy;
+  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
+                                           SmallVector<Constant*, 0>());
+
+  if (EvalSuccess) {
+    ++NumCtorsEvaluated;
+
+    // We succeeded at evaluation: commit the result.
+    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+          << F->getName() << "' to " << Eval.getMutatedMemory().size()
+          << " stores.\n");
+    for (DenseMap<Constant*, Constant*>::const_iterator I =
+           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
+         I != E; ++I)
+      CommitValueTo(I->second, I->first);
+    for (GlobalVariable *GV : Eval.getInvariants())
+      GV->setConstant(true);
+  }
+
+  return EvalSuccess;
+}
+
+static int compareNames(Constant *const *A, Constant *const *B) {
+  return (*A)->stripPointerCasts()->getName().compare(
+      (*B)->stripPointerCasts()->getName());
+}
+
+static void setUsedInitializer(GlobalVariable &V,
+                               const SmallPtrSet<GlobalValue *, 8> &Init) {
+  if (Init.empty()) {
+    V.eraseFromParent();
+    return;
+  }
+
+  // Type of pointer to the array of pointers.
+  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
+
+  SmallVector<llvm::Constant *, 8> UsedArray;
+  for (GlobalValue *GV : Init) {
+    Constant *Cast
+      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
+    UsedArray.push_back(Cast);
+  }
+  // Sort to get deterministic order.
+  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
+  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
+
+  Module *M = V.getParent();
+  V.removeFromParent();
+  GlobalVariable *NV =
+      new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+                         llvm::ConstantArray::get(ATy, UsedArray), "");
+  NV->takeName(&V);
+  NV->setSection("llvm.metadata");
+  delete &V;
+}
+
+namespace {
+/// An easy to access representation of llvm.used and llvm.compiler.used.
+class LLVMUsed {
+  SmallPtrSet<GlobalValue *, 8> Used;
+  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
+  GlobalVariable *UsedV;
+  GlobalVariable *CompilerUsedV;
+
+public:
+  LLVMUsed(Module &M) {
+    UsedV = collectUsedGlobalVariables(M, Used, false);
+    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
+  }
+  typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+  typedef iterator_range<iterator> used_iterator_range;
+  iterator usedBegin() { return Used.begin(); }
+  iterator usedEnd() { return Used.end(); }
+  used_iterator_range used() {
+    return used_iterator_range(usedBegin(), usedEnd());
+  }
+  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
+  iterator compilerUsedEnd() { return CompilerUsed.end(); }
+  used_iterator_range compilerUsed() {
+    return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
+  }
+  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
+  bool compilerUsedCount(GlobalValue *GV) const {
+    return CompilerUsed.count(GV);
+  }
+  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
+  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
+  bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
+  bool compilerUsedInsert(GlobalValue *GV) {
+    return CompilerUsed.insert(GV).second;
+  }
+
+  void syncVariablesAndSets() {
+    if (UsedV)
+      setUsedInitializer(*UsedV, Used);
+    if (CompilerUsedV)
+      setUsedInitializer(*CompilerUsedV, CompilerUsed);
+  }
+};
+}
+
+static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
+  if (GA.use_empty()) // No use at all.
+    return false;
+
+  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
+         "We should have removed the duplicated "
+         "element from llvm.compiler.used");
+  if (!GA.hasOneUse())
+    // Strictly more than one use. So at least one is not in llvm.used and
+    // llvm.compiler.used.
+    return true;
+
+  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
+  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
+}
+
+static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
+                                               const LLVMUsed &U) {
+  unsigned N = 2;
+  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
+         "We should have removed the duplicated "
+         "element from llvm.compiler.used");
+  if (U.usedCount(&V) || U.compilerUsedCount(&V))
+    ++N;
+  return V.hasNUsesOrMore(N);
+}
+
+static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
+  if (!GA.hasLocalLinkage())
+    return true;
+
+  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
+}
+
+static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
+                             bool &RenameTarget) {
+  RenameTarget = false;
+  bool Ret = false;
+  if (hasUseOtherThanLLVMUsed(GA, U))
+    Ret = true;
+
+  // If the alias is externally visible, we may still be able to simplify it.
+  if (!mayHaveOtherReferences(GA, U))
+    return Ret;
+
+  // If the aliasee has internal linkage, give it the name and linkage
+  // of the alias, and delete the alias.  This turns:
+  //   define internal ... @f(...)
+  //   @a = alias ... @f
+  // into:
+  //   define ... @a(...)
+  Constant *Aliasee = GA.getAliasee();
+  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+  if (!Target->hasLocalLinkage())
+    return Ret;
+
+  // Do not perform the transform if multiple aliases potentially target the
+  // aliasee. This check also ensures that it is safe to replace the section
+  // and other attributes of the aliasee with those of the alias.
+  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
+    return Ret;
+
+  RenameTarget = true;
+  return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+  bool Changed = false;
+  LLVMUsed Used(M);
+
+  for (GlobalValue *GV : Used.used())
+    Used.compilerUsedErase(GV);
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E;) {
+    GlobalAlias *J = &*I++;
+
+    // Aliases without names cannot be referenced outside this module.
+    if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
+      J->setLinkage(GlobalValue::InternalLinkage);
+
+    if (deleteIfDead(*J)) {
+      Changed = true;
+      continue;
+    }
+
+    // If the aliasee may change at link time, nothing can be done - bail out.
+    if (J->mayBeOverridden())
+      continue;
+
+    Constant *Aliasee = J->getAliasee();
+    GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
+    // We can't trivially replace the alias with the aliasee if the aliasee is
+    // non-trivial in some way.
+    // TODO: Try to handle non-zero GEPs of local aliasees.
+    if (!Target)
+      continue;
+    Target->removeDeadConstantUsers();
+
+    // Make all users of the alias use the aliasee instead.
+    bool RenameTarget;
+    if (!hasUsesToReplace(*J, Used, RenameTarget))
+      continue;
+
+    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
+    ++NumAliasesResolved;
+    Changed = true;
+
+    if (RenameTarget) {
+      // Give the aliasee the name, linkage and other attributes of the alias.
+      Target->takeName(&*J);
+      Target->setLinkage(J->getLinkage());
+      Target->setVisibility(J->getVisibility());
+      Target->setDLLStorageClass(J->getDLLStorageClass());
+
+      if (Used.usedErase(&*J))
+        Used.usedInsert(Target);
+
+      if (Used.compilerUsedErase(&*J))
+        Used.compilerUsedInsert(Target);
+    } else if (mayHaveOtherReferences(*J, Used))
+      continue;
+
+    // Delete the alias.
+    M.getAliasList().erase(J);
+    ++NumAliasesRemoved;
+    Changed = true;
+  }
+
+  Used.syncVariablesAndSets();
+
+  return Changed;
+}
+
+static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::cxa_atexit))
+    return nullptr;
+
+  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
+
+  if (!Fn)
+    return nullptr;
+
+  FunctionType *FTy = Fn->getFunctionType();
+
+  // Checking that the function has the right return type, the right number of
+  // parameters and that they all have pointer types should be enough.
+  if (!FTy->getReturnType()->isIntegerTy() ||
+      FTy->getNumParams() != 3 ||
+      !FTy->getParamType(0)->isPointerTy() ||
+      !FTy->getParamType(1)->isPointerTy() ||
+      !FTy->getParamType(2)->isPointerTy())
+    return nullptr;
+
+  return Fn;
+}
+
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
+/// other side-effect free instructions.
+static bool cxxDtorIsEmpty(const Function &Fn,
+                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
+  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+  // nounwind, but that doesn't seem worth doing.
+  if (Fn.isDeclaration())
+    return false;
+
+  if (++Fn.begin() != Fn.end())
+    return false;
+
+  const BasicBlock &EntryBlock = Fn.getEntryBlock();
+  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+       I != E; ++I) {
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      // Ignore debug intrinsics.
+      if (isa<DbgInfoIntrinsic>(CI))
+        continue;
+
+      const Function *CalledFn = CI->getCalledFunction();
+
+      if (!CalledFn)
+        return false;
+
+      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+      // Don't treat recursive functions as empty.
+      if (!NewCalledFunctions.insert(CalledFn).second)
+        return false;
+
+      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+        return false;
+    } else if (isa<ReturnInst>(*I))
+      return true; // We're done.
+    else if (I->mayHaveSideEffects())
+      return false; // Destructor with side effects, bail.
+  }
+
+  return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+  /// Itanium C++ ABI p3.3.5:
+  ///
+  ///   After constructing a global (or local static) object, that will require
+  ///   destruction on exit, a termination function is registered as follows:
+  ///
+  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+  ///
+  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+  ///   call f(p) when DSO d is unloaded, before all such termination calls
+  ///   registered before this one. It returns zero if registration is
+  ///   successful, nonzero on failure.
+
+  // This pass will look for calls to __cxa_atexit where the function is trivial
+  // and remove them.
+  bool Changed = false;
+
+  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
+       I != E;) {
+    // We're only interested in calls. Theoretically, we could handle invoke
+    // instructions as well, but neither llvm-gcc nor clang generate invokes
+    // to __cxa_atexit.
+    CallInst *CI = dyn_cast<CallInst>(*I++);
+    if (!CI)
+      continue;
+
+    Function *DtorFn =
+      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+    if (!DtorFn)
+      continue;
+
+    SmallPtrSet<const Function *, 8> CalledFunctions;
+    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+      continue;
+
+    // Just remove the call.
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    CI->eraseFromParent();
+
+    ++NumCXXDtorsRemoved;
+
+    Changed |= true;
+  }
+
+  return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+  bool Changed = false;
+
+  auto &DL = M.getDataLayout();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+  bool LocalChange = true;
+  while (LocalChange) {
+    LocalChange = false;
+
+    NotDiscardableComdats.clear();
+    for (const GlobalVariable &GV : M.globals())
+      if (const Comdat *C = GV.getComdat())
+        if (!GV.isDiscardableIfUnused() || !GV.use_empty())
+          NotDiscardableComdats.insert(C);
+    for (Function &F : M)
+      if (const Comdat *C = F.getComdat())
+        if (!F.isDefTriviallyDead())
+          NotDiscardableComdats.insert(C);
+    for (GlobalAlias &GA : M.aliases())
+      if (const Comdat *C = GA.getComdat())
+        if (!GA.isDiscardableIfUnused() || !GA.use_empty())
+          NotDiscardableComdats.insert(C);
+
+    // Delete functions that are trivially dead, ccc -> fastcc
+    LocalChange |= OptimizeFunctions(M);
+
+    // Optimize global_ctors list.
+    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
+      return EvaluateStaticConstructor(F, DL, TLI);
+    });
+
+    // Optimize non-address-taken globals.
+    LocalChange |= OptimizeGlobalVars(M);
+
+    // Resolve aliases, when possible.
+    LocalChange |= OptimizeGlobalAliases(M);
+
+    // Try to remove trivial global destructors if they are not removed
+    // already.
+    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
+    if (CXAAtExitFn)
+      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
+    Changed |= LocalChange;
+  }
+
+  // TODO: Move all global ctors functions to the end of the module for code
+  // layout.
+
+  return Changed;
+}
+
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 0000000..af541d1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass.  It could certainly be improved in many different ways,
+// like using a worklist.  This pass makes arguments dead, but does not remove
+// them.  The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ipconstprop"
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+  /// IPCP - The interprocedural constant propagation pass
+  ///
+  struct IPCP : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    IPCP() : ModulePass(ID) {
+      initializeIPCPPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) override;
+  private:
+    bool PropagateConstantsIntoArguments(Function &F);
+    bool PropagateConstantReturn(Function &F);
+  };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+                "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+  bool Changed = false;
+  bool LocalChange = true;
+
+  // FIXME: instead of using smart algorithms, we just iterate until we stop
+  // making changes.
+  while (LocalChange) {
+    LocalChange = false;
+    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Delete any klingons.
+        I->removeDeadConstantUsers();
+        if (I->hasLocalLinkage())
+          LocalChange |= PropagateConstantsIntoArguments(*I);
+        Changed |= PropagateConstantReturn(*I);
+      }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function.  If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+  if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+  // For each argument, keep track of its constant value and whether it is a
+  // constant or not.  The bool is driven to true when found to be non-constant.
+  SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+  ArgumentConstants.resize(F.arg_size());
+
+  unsigned NumNonconstant = 0;
+  for (Use &U : F.uses()) {
+    User *UR = U.getUser();
+    // Ignore blockaddress uses.
+    if (isa<BlockAddress>(UR)) continue;
+    
+    // Used by a non-instruction, or not the callee of a function, do not
+    // transform.
+    if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
+      return false;
+    
+    CallSite CS(cast<Instruction>(UR));
+    if (!CS.isCallee(&U))
+      return false;
+
+    // Check out all of the potentially constant arguments.  Note that we don't
+    // inspect varargs here.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    Function::arg_iterator Arg = F.arg_begin();
+    for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+         ++i, ++AI, ++Arg) {
+      
+      // If this argument is known non-constant, ignore it.
+      if (ArgumentConstants[i].second)
+        continue;
+      
+      Constant *C = dyn_cast<Constant>(*AI);
+      if (C && ArgumentConstants[i].first == nullptr) {
+        ArgumentConstants[i].first = C;   // First constant seen.
+      } else if (C && ArgumentConstants[i].first == C) {
+        // Still the constant value we think it is.
+      } else if (*AI == &*Arg) {
+        // Ignore recursive calls passing argument down.
+      } else {
+        // Argument became non-constant.  If all arguments are non-constant now,
+        // give up on this function.
+        if (++NumNonconstant == ArgumentConstants.size())
+          return false;
+        ArgumentConstants[i].second = true;
+      }
+    }
+  }
+
+  // If we got to this point, there is a constant argument!
+  assert(NumNonconstant != ArgumentConstants.size());
+  bool MadeChange = false;
+  Function::arg_iterator AI = F.arg_begin();
+  for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+    // Do we have a constant argument?
+    if (ArgumentConstants[i].second || AI->use_empty() ||
+        AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
+      continue;
+  
+    Value *V = ArgumentConstants[i].first;
+    if (!V) V = UndefValue::get(AI->getType());
+    AI->replaceAllUsesWith(V);
+    ++NumArgumentsProped;
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+  if (F.getReturnType()->isVoidTy())
+    return false; // No return value.
+
+  // If this function could be overridden later in the link stage, we can't
+  // propagate information about its results into callers.
+  if (F.mayBeOverridden())
+    return false;
+    
+  // Check to see if this function returns a constant.
+  SmallVector<Value *,4> RetVals;
+  StructType *STy = dyn_cast<StructType>(F.getReturnType());
+  if (STy)
+    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) 
+      RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+  else
+    RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+  unsigned NumNonConstant = 0;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+        // Already found conflicting return values?
+        Value *RV = RetVals[i];
+        if (!RV)
+          continue;
+
+        // Find the returned value
+        Value *V;
+        if (!STy)
+          V = RI->getOperand(0);
+        else
+          V = FindInsertedValue(RI->getOperand(0), i);
+
+        if (V) {
+          // Ignore undefs, we can change them into anything
+          if (isa<UndefValue>(V))
+            continue;
+          
+          // Try to see if all the rets return the same constant or argument.
+          if (isa<Constant>(V) || isa<Argument>(V)) {
+            if (isa<UndefValue>(RV)) {
+              // No value found yet? Try the current one.
+              RetVals[i] = V;
+              continue;
+            }
+            // Returning the same value? Good.
+            if (RV == V)
+              continue;
+          }
+        }
+        // Different or no known return value? Don't propagate this return
+        // value.
+        RetVals[i] = nullptr;
+        // All values non-constant? Stop looking.
+        if (++NumNonConstant == RetVals.size())
+          return false;
+      }
+    }
+
+  // If we got here, the function returns at least one constant value.  Loop
+  // over all users, replacing any uses of the return value with the returned
+  // constant.
+  bool MadeChange = false;
+  for (Use &U : F.uses()) {
+    CallSite CS(U.getUser());
+    Instruction* Call = CS.getInstruction();
+
+    // Not a call instruction or a call instruction that's not calling F
+    // directly?
+    if (!Call || !CS.isCallee(&U))
+      continue;
+    
+    // Call result not used?
+    if (Call->use_empty())
+      continue;
+
+    MadeChange = true;
+
+    if (!STy) {
+      Value* New = RetVals[0];
+      if (Argument *A = dyn_cast<Argument>(New))
+        // Was an argument returned? Then find the corresponding argument in
+        // the call instruction and use that.
+        New = CS.getArgument(A->getArgNo());
+      Call->replaceAllUsesWith(New);
+      continue;
+    }
+
+    for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) {
+      Instruction *Ins = cast<Instruction>(*I);
+
+      // Increment now, so we can remove the use
+      ++I;
+
+      // Find the index of the retval to replace with
+      int index = -1;
+      if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+        if (EV->hasIndices())
+          index = *EV->idx_begin();
+
+      // If this use uses a specific return value, and we have a replacement,
+      // replace it.
+      if (index != -1) {
+        Value *New = RetVals[index];
+        if (New) {
+          if (Argument *A = dyn_cast<Argument>(New))
+            // Was an argument returned? Then find the corresponding argument in
+            // the call instruction and use that.
+            New = CS.getArgument(A->getArgNo());
+          Ins->replaceAllUsesWith(New);
+          Ins->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  if (MadeChange) ++NumReturnValProped;
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 0000000..7ea6c08
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,118 @@
+//===-- IPO.cpp -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Initialization.h"
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeArgPromotionPass(Registry);
+  initializeConstantMergePass(Registry);
+  initializeCrossDSOCFIPass(Registry);
+  initializeDAEPass(Registry);
+  initializeDAHPass(Registry);
+  initializeForceFunctionAttrsLegacyPassPass(Registry);
+  initializeFunctionAttrsPass(Registry);
+  initializeGlobalDCEPass(Registry);
+  initializeGlobalOptPass(Registry);
+  initializeIPCPPass(Registry);
+  initializeAlwaysInlinerPass(Registry);
+  initializeSimpleInlinerPass(Registry);
+  initializeInferFunctionAttrsLegacyPassPass(Registry);
+  initializeInternalizePassPass(Registry);
+  initializeLoopExtractorPass(Registry);
+  initializeBlockExtractorPassPass(Registry);
+  initializeSingleLoopExtractorPass(Registry);
+  initializeLowerBitSetsPass(Registry);
+  initializeMergeFunctionsPass(Registry);
+  initializePartialInlinerPass(Registry);
+  initializePruneEHPass(Registry);
+  initializeStripDeadPrototypesLegacyPassPass(Registry);
+  initializeStripSymbolsPass(Registry);
+  initializeStripDebugDeclarePass(Registry);
+  initializeStripDeadDebugInfoPass(Registry);
+  initializeStripNonDebugSymbolsPass(Registry);
+  initializeBarrierNoopPass(Registry);
+  initializeEliminateAvailableExternallyPass(Registry);
+  initializeSampleProfileLoaderPass(Registry);
+  initializeFunctionImportPassPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+  initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+  std::vector<const char *> Export;
+  if (AllButMain)
+    Export.push_back("main");
+  unwrap(PM)->add(createInternalizePass(Export));
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 0000000..d02c861
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+  if (F.doesNotAccessMemory())
+    return false;
+  F.setDoesNotAccessMemory();
+  ++NumReadNone;
+  return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+  if (F.onlyReadsMemory())
+    return false;
+  F.setOnlyReadsMemory();
+  ++NumReadOnly;
+  return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+  if (F.doesNotThrow())
+    return false;
+  F.setDoesNotThrow();
+  ++NumNoUnwind;
+  return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+  if (F.doesNotCapture(n))
+    return false;
+  F.setDoesNotCapture(n);
+  ++NumNoCapture;
+  return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+  if (F.onlyReadsMemory(n))
+    return false;
+  F.setOnlyReadsMemory(n);
+  ++NumReadOnlyArg;
+  return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+  if (F.doesNotAlias(n))
+    return false;
+  F.setDoesNotAlias(n);
+  ++NumNoAlias;
+  return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+                                     const TargetLibraryInfo &TLI) {
+  if (F.hasFnAttribute(Attribute::OptimizeNone))
+    return false;
+
+  FunctionType *FTy = F.getFunctionType();
+  LibFunc::Func TheLibFunc;
+  if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+    return false;
+
+  bool Changed = false;
+
+  switch (TheLibFunc) {
+  case LibFunc::strlen:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::strchr:
+  case LibFunc::strrchr:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isIntegerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::strtol:
+  case LibFunc::strtod:
+  case LibFunc::strtof:
+  case LibFunc::strtoul:
+  case LibFunc::strtoll:
+  case LibFunc::strtold:
+  case LibFunc::strtoull:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::strcpy:
+  case LibFunc::stpcpy:
+  case LibFunc::strcat:
+  case LibFunc::strncat:
+  case LibFunc::strncpy:
+  case LibFunc::stpncpy:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::strxfrm:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::strcmp:      // 0,1
+  case LibFunc::strspn:      // 0,1
+  case LibFunc::strncmp:     // 0,1
+  case LibFunc::strcspn:     // 0,1
+  case LibFunc::strcoll:     // 0,1
+  case LibFunc::strcasecmp:  // 0,1
+  case LibFunc::strncasecmp: //
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::strstr:
+  case LibFunc::strpbrk:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::strtok:
+  case LibFunc::strtok_r:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::scanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::setbuf:
+  case LibFunc::setvbuf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::strdup:
+  case LibFunc::strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::stat:
+  case LibFunc::statvfs:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::sscanf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::sprintf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::snprintf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 3);
+    return Changed;
+  case LibFunc::setitimer:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::system:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "system" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::malloc:
+    if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::memcmp:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::memchr:
+  case LibFunc::memrchr:
+    if (FTy->getNumParams() != 3)
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::modf:
+  case LibFunc::modff:
+  case LibFunc::modfl:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::memcpy:
+  case LibFunc::memccpy:
+  case LibFunc::memmove:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::memalign:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::mkdir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::mktime:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::realloc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::read:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "read" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::rewind:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::rmdir:
+  case LibFunc::remove:
+  case LibFunc::realpath:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::rename:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::readlink:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::write:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "write" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::bcopy:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::bcmp:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::bzero:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::calloc:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::chmod:
+  case LibFunc::chown:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::ctermid:
+  case LibFunc::clearerr:
+  case LibFunc::closedir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::atoi:
+  case LibFunc::atol:
+  case LibFunc::atof:
+  case LibFunc::atoll:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::access:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::fopen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fdopen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::feof:
+  case LibFunc::free:
+  case LibFunc::fseek:
+  case LibFunc::ftell:
+  case LibFunc::fgetc:
+  case LibFunc::fseeko:
+  case LibFunc::ftello:
+  case LibFunc::fileno:
+  case LibFunc::fflush:
+  case LibFunc::fclose:
+  case LibFunc::fsetpos:
+  case LibFunc::flockfile:
+  case LibFunc::funlockfile:
+  case LibFunc::ftrylockfile:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::ferror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F);
+    return Changed;
+  case LibFunc::fputc:
+  case LibFunc::fstat:
+  case LibFunc::frexp:
+  case LibFunc::frexpf:
+  case LibFunc::frexpl:
+  case LibFunc::fstatvfs:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::fgets:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 3);
+    return Changed;
+  case LibFunc::fread:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::fwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::fputs:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::fscanf:
+  case LibFunc::fprintf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fgetpos:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::getc:
+  case LibFunc::getlogin_r:
+  case LibFunc::getc_unlocked:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::getenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::gets:
+  case LibFunc::getchar:
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::getitimer:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::getpwnam:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::ungetc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::uname:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::unlink:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::unsetenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::utime:
+  case LibFunc::utimes:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::puts:
+  case LibFunc::printf:
+  case LibFunc::perror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::pread:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "pread" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::pwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "pwrite" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::putchar:
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::popen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::pclose:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::vscanf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::vsscanf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::vfscanf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::valloc:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::vprintf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::vfprintf:
+  case LibFunc::vsprintf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::vsnprintf:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 3);
+    return Changed;
+  case LibFunc::open:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::opendir:
+    if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::tmpfile:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::times:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::htonl:
+  case LibFunc::htons:
+  case LibFunc::ntohl:
+  case LibFunc::ntohs:
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAccessMemory(F);
+    return Changed;
+  case LibFunc::lstat:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::lchown:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::qsort:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+      return false;
+    // May throw; places call through function pointer.
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::dunder_strdup:
+  case LibFunc::dunder_strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::dunder_strtok_r:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::under_IO_getc:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::under_IO_putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::dunder_isoc99_scanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::stat64:
+  case LibFunc::lstat64:
+  case LibFunc::statvfs64:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::dunder_isoc99_sscanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fopen64:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fseeko64:
+  case LibFunc::ftello64:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::tmpfile64:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::fstat64:
+  case LibFunc::fstatvfs64:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::open64:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::gettimeofday:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // Currently some platforms have the restrict keyword on the arguments to
+    // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+    // arguments.
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+
+  default:
+    // FIXME: It'd be really nice to cover all the library functions we're
+    // aware of here.
+    return false;
+  }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+                                        const TargetLibraryInfo &TLI) {
+  bool Changed = false;
+
+  for (Function &F : M.functions())
+    // We only infer things using the prototype if the definition isn't around
+    // to analyze directly.
+    if (F.isDeclaration())
+      Changed |= inferPrototypeAttributes(F, TLI);
+
+  return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+                                              AnalysisManager<Module> *AM) {
+  auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+  if (!inferAllPrototypeAttributes(M, TLI))
+    // If we didn't infer anything, preserve all analyses.
+    return PreservedAnalyses::all();
+
+  // Otherwise, we may have changed fundamental function attributes, so clear
+  // out all the passes.
+  return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+    initializeInferFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override {
+    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    return inferAllPrototypeAttributes(M, TLI);
+  }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+                      "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+                    "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+  return new InferFunctionAttrsLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 0000000..1704bfe
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,100 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+namespace {
+
+/// \brief Inliner pass which only handles "always inline" functions.
+class AlwaysInliner : public Inliner {
+
+public:
+  // Use extremely low threshold.
+  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
+    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  AlwaysInliner(bool InsertLifetime)
+      : Inliner(ID, -2000000000, InsertLifetime) {
+    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+
+  InlineCost getInlineCost(CallSite CS) override;
+
+  using llvm::Pass::doFinalization;
+  bool doFinalization(CallGraph &CG) override {
+    return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
+  }
+};
+
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
+  return new AlwaysInliner(InsertLifetime);
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+
+  // Only inline direct calls to functions with always-inline attributes
+  // that are viable for inlining. FIXME: We shouldn't even get here for
+  // declarations.
+  if (Callee && !Callee->isDeclaration() &&
+      CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
+    return InlineCost::getAlways();
+
+  return InlineCost::getNever();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 0000000..45609f8
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,110 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+namespace {
+
+/// \brief Actual inliner pass implementation.
+///
+/// The common implementation of the inlining logic is shared between this
+/// inliner pass and the always inliner pass. The two passes use different cost
+/// analyses to determine when to inline.
+class SimpleInliner : public Inliner {
+
+public:
+  SimpleInliner() : Inliner(ID) {
+    initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  SimpleInliner(int Threshold)
+      : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
+    initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+
+  InlineCost getInlineCost(CallSite CS) override {
+    Function *Callee = CS.getCalledFunction();
+    TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+    return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
+  }
+
+  bool runOnSCC(CallGraphSCC &SCC) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  TargetTransformInfoWrapperPass *TTIWP;
+};
+
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+                                         unsigned SizeOptLevel) {
+  if (OptLevel > 2)
+    return 275;
+  if (SizeOptLevel == 1) // -Os
+    return 75;
+  if (SizeOptLevel == 2) // -Oz
+    return 25;
+  return 225;
+}
+
+} // end anonymous namespace
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) {
+  return new SimpleInliner(Threshold);
+}
+
+Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
+                                       unsigned SizeOptLevel) {
+  return new SimpleInliner(
+      computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
+
+bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
+  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+  return Inliner::runOnSCC(SCC);
+}
+
+void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
+  Inliner::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 0000000..bbe5f876
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,741 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph.  The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+// This weirdly named statistic tracks the number of times that, when attempting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+        cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+              cl::desc("Threshold for inlining functions with inline hint"));
+
+// We instroduce this threshold to help performance of instrumentation based
+// PGO before we actually hook up inliner with analysis passes such as BPI and
+// BFI.
+static cl::opt<int>
+ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
+              cl::desc("Threshold for inlining functions with cold attribute"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID)
+    : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
+
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
+    : CallGraphSCCPass(ID),
+      InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+                                                          : Threshold),
+      InsertLifetime(InsertLifetime) {}
+
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  CallGraphSCCPass::getAnalysisUsage(AU);
+}
+
+
+typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
+                                 int InlineHistory, bool InsertLifetime) {
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+
+  // We need to manually construct BasicAA directly in order to disable
+  // its use of other function analyses.
+  BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+  // Construct our own AA results for this function. We do this manually to
+  // work around the limitations of the legacy pass manager.
+  AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
+    return false;
+
+  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
+
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+  
+  // When processing our SCC, check to see if CS was inlined from some other
+  // call site.  For example, if we're processing "A" in this code:
+  //   A() { B() }
+  //   B() { x = alloca ... C() }
+  //   C() { y = alloca ... }
+  // Assume that C was not inlined into B initially, and so we're processing A
+  // and decide to inline B into A.  Doing this makes an alloca available for
+  // reuse and makes a callsite (C) available for inlining.  When we process
+  // the C call site we don't want to do any alloca merging between X and Y
+  // because their scopes are not disjoint.  We could make this smarter by
+  // keeping track of the inline history for each alloca in the
+  // InlinedArrayAllocas but this isn't likely to be a significant win.
+  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
+    return true;
+  
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+    
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (!ATy || AI->isArrayAllocation())
+      continue;
+    
+    // Get the list of all available allocas for this array type.
+    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+    
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (AllocaInst *AvailableAlloca : AllocasForType) {
+
+      unsigned Align1 = AI->getAlignment(),
+               Align2 = AvailableAlloca->getAlignment();
+      
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+      
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca).second)
+        continue;
+      
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+                   << *AvailableAlloca << '\n');
+      
+      // Move affected dbg.declare calls immediately after the new alloca to
+      // avoid the situation when a dbg.declare preceeds its alloca.
+      if (auto *L = LocalAsMetadata::getIfExists(AI))
+        if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+          for (User *U : MDV->users())
+            if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+              DDI->moveBefore(AvailableAlloca->getNextNode());
+
+      AI->replaceAllUsesWith(AvailableAlloca);
+
+      if (Align1 != Align2) {
+        if (!Align1 || !Align2) {
+          const DataLayout &DL = Caller->getParent()->getDataLayout();
+          unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
+
+          Align1 = Align1 ? Align1 : TypeAlign;
+          Align2 = Align2 ? Align2 : TypeAlign;
+        }
+
+        if (Align1 > Align2)
+          AvailableAlloca->setAlignment(AI->getAlignment());
+      }
+
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      ++NumMergedAllocas;
+      IFI.StaticAllocas[AllocaNo] = nullptr;
+      break;
+    }
+
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+    
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
+  }
+  
+  return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+  int Threshold = InlineThreshold; // -inline-threshold or else selected by
+                                   // overall opt level
+
+  // If -inline-threshold is not given, listen to the optsize attribute when it
+  // would decrease the threshold.
+  Function *Caller = CS.getCaller();
+  bool OptSize = Caller && !Caller->isDeclaration() &&
+                 // FIXME: Use Function::optForSize().
+                 Caller->hasFnAttribute(Attribute::OptimizeForSize);
+  if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
+      OptSizeThreshold < Threshold)
+    Threshold = OptSizeThreshold;
+
+  Function *Callee = CS.getCalledFunction();
+  if (!Callee || Callee->isDeclaration())
+    return Threshold;
+
+  // If profile information is available, use that to adjust threshold of hot
+  // and cold functions.
+  // FIXME: The heuristic used below for determining hotness and coldness are
+  // based on preliminary SPEC tuning and may not be optimal. Replace this with
+  // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+  uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+  bool HasPGOCounts = false;
+  if (Callee->getEntryCount() &&
+      Callee->getParent()->getMaximumFunctionCount()) {
+    HasPGOCounts = true;
+    FunctionCount = Callee->getEntryCount().getValue();
+    MaxFunctionCount =
+        Callee->getParent()->getMaximumFunctionCount().getValue();
+  }
+
+  // Listen to the inlinehint attribute or profile based hotness information
+  // when it would increase the threshold and the caller does not need to
+  // minimize its size.
+  bool InlineHint =
+      Callee->hasFnAttribute(Attribute::InlineHint) ||
+      (HasPGOCounts &&
+       FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+  if (InlineHint && HintThreshold > Threshold &&
+      !Caller->hasFnAttribute(Attribute::MinSize))
+    Threshold = HintThreshold;
+
+  // Listen to the cold attribute or profile based coldness information
+  // when it would decrease the threshold.
+  bool ColdCallee =
+      Callee->hasFnAttribute(Attribute::Cold) ||
+      (HasPGOCounts &&
+       FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
+  // Command line argument for InlineLimit will override the default
+  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+  // do not use the default cold threshold even if it is smaller.
+  if ((InlineLimit.getNumOccurrences() == 0 ||
+       ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+      ColdThreshold < Threshold)
+    Threshold = ColdThreshold;
+
+  return Threshold;
+}
+
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+  Function *Caller = CS.getCaller();
+  LLVMContext &Ctx = Caller->getContext();
+  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
+/// Return true if the inliner should attempt to inline at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+  InlineCost IC = getInlineCost(CS);
+  
+  if (IC.isAlways()) {
+    DEBUG(dbgs() << "    Inlining: cost=always"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+                         " should always be inlined (cost=always)");
+    return true;
+  }
+  
+  if (IC.isNever()) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=never"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+                           " should never be inlined (cost=never)"));
+    return false;
+  }
+  
+  Function *Caller = CS.getCaller();
+  if (!IC) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
+          << ", thres=" << (IC.getCostDelta() + IC.getCost())
+          << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+                           " too costly to inline (cost=") +
+                         Twine(IC.getCost()) + ", threshold=" +
+                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
+    return false;
+  }
+  
+  // Try to detect the case where the current inlining candidate caller (call
+  // it B) is a static or linkonce-ODR function and is an inlining candidate
+  // elsewhere, and the current candidate callee (call it C) is large enough
+  // that inlining it into B would make B too big to inline later. In these
+  // circumstances it may be best not to inline C into B, but to inline B into
+  // its callers.
+  //
+  // This only applies to static and linkonce-ODR functions because those are
+  // expected to be available for inlining in the translation units where they
+  // are used. Thus we will always have the opportunity to make local inlining
+  // decisions. Importantly the linkonce-ODR linkage covers inline functions
+  // and templates in C++.
+  //
+  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+  // the internal implementation of the inline cost metrics rather than
+  // treating them as truly abstract units etc.
+  if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
+    int TotalSecondaryCost = 0;
+    // The candidate cost to be imposed upon the current function.
+    int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+    // This bool tracks what happens if we do NOT inline C into B.
+    bool callerWillBeRemoved = Caller->hasLocalLinkage();
+    // This bool tracks what happens if we DO inline C into B.
+    bool inliningPreventsSomeOuterInline = false;
+    for (User *U : Caller->users()) {
+      CallSite CS2(U);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.  Such references will prevent the caller
+      // from being removed.
+      if (!CS2 || CS2.getCalledFunction() != Caller) {
+        callerWillBeRemoved = false;
+        continue;
+      }
+
+      InlineCost IC2 = getInlineCost(CS2);
+      ++NumCallerCallersAnalyzed;
+      if (!IC2) {
+        callerWillBeRemoved = false;
+        continue;
+      }
+      if (IC2.isAlways())
+        continue;
+
+      // See if inlining or original callsite would erase the cost delta of
+      // this callsite. We subtract off the penalty for the call instruction,
+      // which we would be deleting.
+      if (IC2.getCostDelta() <= CandidateCost) {
+        inliningPreventsSomeOuterInline = true;
+        TotalSecondaryCost += IC2.getCost();
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (callerWillBeRemoved && !Caller->use_empty())
+      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+    if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() <<
+           " Cost = " << IC.getCost() <<
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      emitAnalysis(
+          CS, Twine("Not inlining. Cost of inlining " +
+                    CS.getCalledFunction()->getName() +
+                    " increases the cost of inlining " +
+                    CS.getCaller()->getName() + " in other contexts"));
+      return false;
+    }
+  }
+
+  DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
+        << ", thres=" << (IC.getCostDelta() + IC.getCost())
+        << ", Call: " << *CS.getInstruction() << '\n');
+  emitAnalysis(
+      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+  return true;
+}
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+  ACT = &getAnalysis<AssumptionCacheTracker>();
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+  SmallPtrSet<Function*, 8> SCCFunctions;
+  DEBUG(dbgs() << "Inliner visiting SCC:");
+  for (CallGraphNode *Node : SCC) {
+    Function *F = Node->getFunction();
+    if (F) SCCFunctions.insert(F);
+    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+  }
+
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  SmallVector<std::pair<CallSite, int>, 16> CallSites;
+  
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+  for (CallGraphNode *Node : SCC) {
+    Function *F = Node->getFunction();
+    if (!F) continue;
+    
+    for (BasicBlock &BB : *F)
+      for (Instruction &I : BB) {
+        CallSite CS(cast<Value>(&I));
+        // If this isn't a call, or it is a call to an intrinsic, it can
+        // never be inlined.
+        if (!CS || isa<IntrinsicInst>(I))
+          continue;
+        
+        // If this is a direct call to an external function, we can never inline
+        // it.  If it is an indirect call, inlining may resolve it to be a
+        // direct call, so we keep it.
+        if (Function *Callee = CS.getCalledFunction())
+          if (Callee->isDeclaration())
+            continue;
+        
+        CallSites.push_back(std::make_pair(CS, -1));
+      }
+  }
+
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+  // If there are no calls in this function, exit early.
+  if (CallSites.empty())
+    return false;
+
+  // Now that we have all of the call sites, move the ones to functions in the
+  // current SCC to the end of the list.
+  unsigned FirstCallInSCC = CallSites.size();
+  for (unsigned i = 0; i < FirstCallInSCC; ++i)
+    if (Function *F = CallSites[i].first.getCalledFunction())
+      if (SCCFunctions.count(F))
+        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+  
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  InlineFunctionInfo InlineInfo(&CG, ACT);
+
+  // Now that we have all of the call sites, loop over them and inline them if
+  // it looks profitable to do so.
+  bool Changed = false;
+  bool LocalChange;
+  do {
+    LocalChange = false;
+    // Iterate over the outer loop because inlining functions can cause indirect
+    // calls to become direct calls.
+    // CallSites may be modified inside so ranged for loop can not be used.
+    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+      CallSite CS = CallSites[CSi].first;
+      
+      Function *Caller = CS.getCaller();
+      Function *Callee = CS.getCalledFunction();
+
+      // If this call site is dead and it is to a readonly function, we should
+      // just delete the call instead of trying to inline it, regardless of
+      // size.  This happens because IPSCCP propagates the result out of the
+      // call and then we're left with the dead call.
+      if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
+        DEBUG(dbgs() << "    -> Deleting dead call: "
+                     << *CS.getInstruction() << "\n");
+        // Update the call graph by deleting the edge from Callee to Caller.
+        CG[Caller]->removeCallEdgeFor(CS);
+        CS.getInstruction()->eraseFromParent();
+        ++NumCallsDeleted;
+      } else {
+        // We can only inline direct calls to non-declarations.
+        if (!Callee || Callee->isDeclaration()) continue;
+      
+        // If this call site was obtained by inlining another function, verify
+        // that the include path for the function did not include the callee
+        // itself.  If so, we'd be recursively inlining the same function,
+        // which would provide the same callsites, which would cause us to
+        // infinitely inline.
+        int InlineHistoryID = CallSites[CSi].second;
+        if (InlineHistoryID != -1 &&
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+          continue;
+        
+        LLVMContext &CallerCtx = Caller->getContext();
+
+        // Get DebugLoc to report. CS will be invalid after Inliner.
+        DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
+        // If the policy determines that we should inline this function,
+        // try to do so.
+        if (!shouldInline(CS)) {
+          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+                                       Twine(Callee->getName() +
+                                             " will not be inlined into " +
+                                             Caller->getName()));
+          continue;
+        }
+
+        // Attempt to inline the function.
+        if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
+                                  InlineHistoryID, InsertLifetime)) {
+          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+                                       Twine(Callee->getName() +
+                                             " will not be inlined into " +
+                                             Caller->getName()));
+          continue;
+        }
+        ++NumInlined;
+
+        // Report the inline decision.
+        emitOptimizationRemark(
+            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+            Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
+        // If inlining this function gave us any new call sites, throw them
+        // onto our worklist to process.  They are useful inline candidates.
+        if (!InlineInfo.InlinedCalls.empty()) {
+          // Create a new inline history entry for this, so that we remember
+          // that these new callsites came about due to inlining Callee.
+          int NewHistoryID = InlineHistory.size();
+          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+          for (Value *Ptr : InlineInfo.InlinedCalls)
+            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+        }
+      }
+      
+      // If we inlined or deleted the last possible call site to the function,
+      // delete the function body now.
+      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+          // TODO: Can remove if in SCC now.
+          !SCCFunctions.count(Callee) &&
+          
+          // The function may be apparently dead, but if there are indirect
+          // callgraph references to the node, we cannot delete it yet, this
+          // could invalidate the CGSCC iterator.
+          CG[Callee]->getNumReferences() == 0) {
+        DEBUG(dbgs() << "    -> Deleting dead function: "
+              << Callee->getName() << "\n");
+        CallGraphNode *CalleeNode = CG[Callee];
+
+        // Remove any call graph edges from the callee to its callees.
+        CalleeNode->removeAllCalledFunctions();
+        
+        // Removing the node for callee from the call graph and delete it.
+        delete CG.removeFunctionFromModule(CalleeNode);
+        ++NumDeleted;
+      }
+
+      // Remove this call site from the list.  If possible, use 
+      // swap/pop_back for efficiency, but do not use it if doing so would
+      // move a call site to a function in this SCC before the
+      // 'FirstCallInSCC' barrier.
+      if (SCC.isSingular()) {
+        CallSites[CSi] = CallSites.back();
+        CallSites.pop_back();
+      } else {
+        CallSites.erase(CallSites.begin()+CSi);
+      }
+      --CSi;
+
+      Changed = true;
+      LocalChange = true;
+    }
+  } while (LocalChange);
+
+  return Changed;
+}
+
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+  return removeDeadFunctions(CG);
+}
+
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+  SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+  SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
+  SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+
+  auto RemoveCGN = [&](CallGraphNode *CGN) {
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.push_back(CGN);
+  };
+
+  // Scan for all of the functions, looking for ones that should now be removed
+  // from the program.  Insert the dead ones in the FunctionsToRemove set.
+  for (const auto &I : CG) {
+    CallGraphNode *CGN = I.second.get();
+    Function *F = CGN->getFunction();
+    if (!F || F->isDeclaration())
+      continue;
+
+    // Handle the case when this function is called and we only want to care
+    // about always-inline functions. This is a bit of a hack to share code
+    // between here and the InlineAlways pass.
+    if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
+      continue;
+
+    // If the only remaining users of the function are dead constants, remove
+    // them.
+    F->removeDeadConstantUsers();
+
+    if (!F->isDefTriviallyDead())
+      continue;
+
+    // It is unsafe to drop a function with discardable linkage from a COMDAT
+    // without also dropping the other members of the COMDAT.
+    // The inliner doesn't visit non-function entities which are in COMDAT
+    // groups so it is unsafe to do so *unless* the linkage is local.
+    if (!F->hasLocalLinkage()) {
+      if (const Comdat *C = F->getComdat()) {
+        --ComdatEntriesAlive[C];
+        DeadFunctionsInComdats.push_back(CGN);
+        continue;
+      }
+    }
+
+    RemoveCGN(CGN);
+  }
+  if (!DeadFunctionsInComdats.empty()) {
+    // Count up all the entities in COMDAT groups
+    auto ComdatGroupReferenced = [&](const Comdat *C) {
+      auto I = ComdatEntriesAlive.find(C);
+      if (I != ComdatEntriesAlive.end())
+        ++(I->getSecond());
+    };
+    for (const Function &F : CG.getModule())
+      if (const Comdat *C = F.getComdat())
+        ComdatGroupReferenced(C);
+    for (const GlobalVariable &GV : CG.getModule().globals())
+      if (const Comdat *C = GV.getComdat())
+        ComdatGroupReferenced(C);
+    for (const GlobalAlias &GA : CG.getModule().aliases())
+      if (const Comdat *C = GA.getComdat())
+        ComdatGroupReferenced(C);
+    for (CallGraphNode *CGN : DeadFunctionsInComdats) {
+      Function *F = CGN->getFunction();
+      const Comdat *C = F->getComdat();
+      int NumAlive = ComdatEntriesAlive[C];
+      // We can remove functions in a COMDAT group if the entire group is dead.
+      assert(NumAlive >= 0);
+      if (NumAlive > 0)
+        continue;
+
+      RemoveCGN(CGN);
+    }
+  }
+
+  if (FunctionsToRemove.empty())
+    return false;
+
+  // Now that we know which functions to delete, do so.  We didn't want to do
+  // this inline, because that would invalidate our CallGraph::iterator
+  // objects. :(
+  //
+  // Note that it doesn't matter that we are iterating over a non-stable order
+  // here to do this, it doesn't matter which order the functions are deleted
+  // in.
+  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+  FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+                                      FunctionsToRemove.end()),
+                          FunctionsToRemove.end());
+  for (CallGraphNode *CGN : FunctionsToRemove) {
+    delete CG.removeFunctionFromModule(CGN);
+    ++NumDeleted;
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 0000000..21bb5d0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,269 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions and variables in the input module.
+// If the function or variable is not in the list of external names given to
+// the pass it is marked as internal.
+//
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
+//
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "internalize"
+
+STATISTIC(NumAliases  , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals  , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+        cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+        cl::desc("A list of symbol names to preserve"),
+        cl::CommaSeparated);
+
+namespace {
+  class InternalizePass : public ModulePass {
+    std::set<std::string> ExternalNames;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit InternalizePass();
+    explicit InternalizePass(ArrayRef<const char *> ExportList);
+    void LoadFile(const char *Filename);
+    bool maybeInternalize(GlobalValue &GV,
+                          const std::set<const Comdat *> &ExternalComdats);
+    void checkComdatVisibility(GlobalValue &GV,
+                               std::set<const Comdat *> &ExternalComdats);
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      AU.addPreserved<CallGraphWrapperPass>();
+    }
+  };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+                "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass() : ModulePass(ID) {
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  if (!APIFile.empty())           // If a filename is specified, use it.
+    LoadFile(APIFile.c_str());
+  ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
+    : ModulePass(ID) {
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+        itr != ExportList.end(); itr++) {
+    ExternalNames.insert(*itr);
+  }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+  // Load the APIFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: Internalize couldn't load file '" << Filename
+         << "'! Continuing as if it's empty.\n";
+    return; // Just continue as if the file were empty
+  }
+  while (In) {
+    std::string Symbol;
+    In >> Symbol;
+    if (!Symbol.empty())
+      ExternalNames.insert(Symbol);
+  }
+}
+
+static bool isExternallyVisible(const GlobalValue &GV,
+                                const std::set<std::string> &ExternalNames) {
+  // Function must be defined here
+  if (GV.isDeclaration())
+    return true;
+
+  // Available externally is really just a "declaration with a body".
+  if (GV.hasAvailableExternallyLinkage())
+    return true;
+
+  // Assume that dllexported symbols are referenced elsewhere
+  if (GV.hasDLLExportStorageClass())
+    return true;
+
+  // Marked to keep external?
+  if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+    return true;
+
+  return false;
+}
+
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+    GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+  if (Comdat *C = GV.getComdat()) {
+    if (ExternalComdats.count(C))
+      return false;
+
+    // If a comdat is not externally visible we can drop it.
+    if (auto GO = dyn_cast<GlobalObject>(&GV))
+      GO->setComdat(nullptr);
+
+    if (GV.hasLocalLinkage())
+      return false;
+  } else {
+    if (GV.hasLocalLinkage())
+      return false;
+
+    if (isExternallyVisible(GV, ExternalNames))
+      return false;
+  }
+
+  GV.setVisibility(GlobalValue::DefaultVisibility);
+  GV.setLinkage(GlobalValue::InternalLinkage);
+  return true;
+}
+
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+    GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+  Comdat *C = GV.getComdat();
+  if (!C)
+    return;
+
+  if (isExternallyVisible(GV, ExternalNames))
+    ExternalComdats.insert(C);
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+  CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
+  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
+
+  SmallPtrSet<GlobalValue *, 8> Used;
+  collectUsedGlobalVariables(M, Used, false);
+
+  // Collect comdat visiblity information for the module.
+  std::set<const Comdat *> ExternalComdats;
+  if (!M.getComdatSymbolTable().empty()) {
+    for (Function &F : M)
+      checkComdatVisibility(F, ExternalComdats);
+    for (GlobalVariable &GV : M.globals())
+      checkComdatVisibility(GV, ExternalComdats);
+    for (GlobalAlias &GA : M.aliases())
+      checkComdatVisibility(GA, ExternalComdats);
+  }
+
+  // We must assume that globals in llvm.used have a reference that not even
+  // the linker can see, so we don't internalize them.
+  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
+  // linker can drop those symbols. If this pass is running as part of LTO,
+  // one might think that it could just drop llvm.compiler.used. The problem
+  // is that even in LTO llvm doesn't see every reference. For example,
+  // we don't see references from function local inline assembly. To be
+  // conservative, we internalize symbols in llvm.compiler.used, but we
+  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
+  for (GlobalValue *V : Used) {
+    ExternalNames.insert(V->getName());
+  }
+
+  // Mark all functions not in the api as internal.
+  for (Function &I : M) {
+    if (!maybeInternalize(I, ExternalComdats))
+      continue;
+
+    if (ExternalNode)
+      // Remove a callgraph edge from the external node to this function.
+      ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
+
+    ++NumFunctions;
+    DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
+  }
+
+  // Never internalize the llvm.used symbol.  It is used to implement
+  // attribute((used)).
+  // FIXME: Shouldn't this just filter on llvm.metadata section??
+  ExternalNames.insert("llvm.used");
+  ExternalNames.insert("llvm.compiler.used");
+
+  // Never internalize anchors used by the machine module info, else the info
+  // won't find them.  (see MachineModuleInfo.)
+  ExternalNames.insert("llvm.global_ctors");
+  ExternalNames.insert("llvm.global_dtors");
+  ExternalNames.insert("llvm.global.annotations");
+
+  // Never internalize symbols code-gen inserts.
+  // FIXME: We should probably add this (and the __stack_chk_guard) via some
+  // type of call-back in CodeGen.
+  ExternalNames.insert("__stack_chk_fail");
+  ExternalNames.insert("__stack_chk_guard");
+
+  // Mark all global variables with initializers that are not in the api as
+  // internal as well.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    if (!maybeInternalize(*I, ExternalComdats))
+      continue;
+
+    ++NumGlobals;
+    DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+  }
+
+  // Mark all aliases that are not in the api as internal as well.
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ++I) {
+    if (!maybeInternalize(*I, ExternalComdats))
+      continue;
+
+    ++NumAliases;
+    DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+  }
+
+  // We do not keep track of whether this pass changed the module because
+  // it adds unnecessary complexity:
+  // 1) This pass will generally be near the start of the pass pipeline, so
+  //    there will be no analyses to invalidate.
+  // 2) This pass will most likely end up changing the module and it isn't worth
+  //    worrying about optimizing the case where the module is unchanged.
+  return true;
+}
+
+ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
+
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+  return new InternalizePass(ExportList);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 0000000..8e4ad64
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,310 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-extract"
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+  struct LoopExtractor : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    unsigned NumLoops;
+
+    explicit LoopExtractor(unsigned numLoops = ~0) 
+      : LoopPass(ID), NumLoops(numLoops) {
+        initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnLoop(Loop *L, LPPassManager &) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequiredID(BreakCriticalEdgesID);
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
+    }
+  };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+                      "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+                    "Extract loops into new functions", false, false)
+
+namespace {
+  /// SingleLoopExtractor - For bugpoint.
+  struct SingleLoopExtractor : public LoopExtractor {
+    static char ID; // Pass identification, replacement for typeid
+    SingleLoopExtractor() : LoopExtractor(1) {}
+  };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+                "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
+  if (skipOptnoneFunction(L))
+    return false;
+
+  // Only visit top-level loops.
+  if (L->getParentLoop())
+    return false;
+
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  bool Changed = false;
+
+  // If there is more than one top-level loop in this function, extract all of
+  // the loops. Otherwise there is exactly one top-level loop; in this case if
+  // this function is more than a minimal wrapper around the loop, extract
+  // the loop.
+  bool ShouldExtractLoop = false;
+
+  // Extract the loop if the entry block doesn't branch to the loop header.
+  TerminatorInst *EntryTI =
+    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  if (!isa<BranchInst>(EntryTI) ||
+      !cast<BranchInst>(EntryTI)->isUnconditional() ||
+      EntryTI->getSuccessor(0) != L->getHeader()) {
+    ShouldExtractLoop = true;
+  } else {
+    // Check to see if any exits from the loop are more than just return
+    // blocks.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+        ShouldExtractLoop = true;
+        break;
+      }
+  }
+
+  if (ShouldExtractLoop) {
+    // We must omit EH pads. EH pads must accompany the invoke
+    // instruction. But this would result in a loop in the extracted
+    // function. An infinite cycle occurs when it tries to extract that loop as
+    // well.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i]->isEHPad()) {
+        ShouldExtractLoop = false;
+        break;
+      }
+  }
+
+  if (ShouldExtractLoop) {
+    if (NumLoops == 0) return Changed;
+    --NumLoops;
+    CodeExtractor Extractor(DT, *L);
+    if (Extractor.extractCodeRegion() != nullptr) {
+      Changed = true;
+      // After extraction, the loop is replaced by a function call, so
+      // we shouldn't try to run any more loop passes on it.
+      LI.updateUnloop(L);
+    }
+    ++NumExtracted;
+  }
+
+  return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can.  This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+  return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+          cl::desc("A file containing list of basic blocks to not extract"),
+          cl::Hidden);
+
+namespace {
+  /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+  /// from the module into their own functions except for those specified by the
+  /// BlocksToNotExtract list.
+  class BlockExtractorPass : public ModulePass {
+    void LoadFile(const char *Filename);
+    void SplitLandingPadPreds(Function *F);
+
+    std::vector<BasicBlock*> BlocksToNotExtract;
+    std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    BlockExtractorPass() : ModulePass(ID) {
+      if (!BlockFile.empty())
+        LoadFile(BlockFile.c_str());
+    }
+
+    bool runOnModule(Module &M) override;
+  };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+                "Extract Basic Blocks From Module (for bugpoint use)",
+                false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass() {
+  return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+  // Load the BlockFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+           << "'!\n";
+    return;
+  }
+  while (In) {
+    std::string FunctionName, BlockName;
+    In >> FunctionName;
+    In >> BlockName;
+    if (!BlockName.empty())
+      BlocksToNotExtractByName.push_back(
+          std::make_pair(FunctionName, BlockName));
+  }
+}
+
+/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke
+/// instruction. The critical edge breaker will refuse to break critical edges
+/// to a landing pad. So do them here. After this method runs, all landing pads
+/// should have only one predecessor.
+void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    InvokeInst *II = dyn_cast<InvokeInst>(I);
+    if (!II) continue;
+    BasicBlock *Parent = II->getParent();
+    BasicBlock *LPad = II->getUnwindDest();
+
+    // Look through the landing pad's predecessors. If one of them ends in an
+    // 'invoke', then we want to split the landing pad.
+    bool Split = false;
+    for (pred_iterator
+           PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) {
+      BasicBlock *BB = *PI;
+      if (BB->isLandingPad() && BB != Parent &&
+          isa<InvokeInst>(Parent->getTerminator())) {
+        Split = true;
+        break;
+      }
+    }
+
+    if (!Split) continue;
+
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
+  }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+  std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+  for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+    BasicBlock *BB = BlocksToNotExtract[i];
+    Function *F = BB->getParent();
+
+    // Map the corresponding function in this module.
+    Function *MF = M.getFunction(F->getName());
+    assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+    // Figure out which index the basic block is in its function.
+    Function::iterator BBI = MF->begin();
+    std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+    TranslatedBlocksToNotExtract.insert(&*BBI);
+  }
+
+  while (!BlocksToNotExtractByName.empty()) {
+    // There's no way to find BBs by name without looking at every BB inside
+    // every Function. Fortunately, this is always empty except when used by
+    // bugpoint in which case correctness is more important than performance.
+
+    std::string &FuncName  = BlocksToNotExtractByName.back().first;
+    std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+    for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+      Function &F = *FI;
+      if (F.getName() != FuncName) continue;
+
+      for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+        BasicBlock &BB = *BI;
+        if (BB.getName() != BlockName) continue;
+
+        TranslatedBlocksToNotExtract.insert(&*BI);
+      }
+    }
+
+    BlocksToNotExtractByName.pop_back();
+  }
+
+  // Now that we know which blocks to not extract, figure out which ones we WANT
+  // to extract.
+  std::vector<BasicBlock*> BlocksToExtract;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    SplitLandingPadPreds(&*F);
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (!TranslatedBlocksToNotExtract.count(&*BB))
+        BlocksToExtract.push_back(&*BB);
+  }
+
+  for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+    SmallVector<BasicBlock*, 2> BlocksToExtractVec;
+    BlocksToExtractVec.push_back(BlocksToExtract[i]);
+    if (const InvokeInst *II =
+        dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+      BlocksToExtractVec.push_back(II->getUnwindDest());
+    CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+  }
+
+  return !BlocksToExtract.empty();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 0000000..7b51574
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,1055 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+static cl::opt<bool> AvoidReuse(
+    "lowerbitsets-avoid-reuse",
+    cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+    cl::Hidden, cl::init(true));
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+  if (Offset < ByteOffset)
+    return false;
+
+  if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+    return false;
+
+  uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+  if (BitOffset >= BitSize)
+    return false;
+
+  return Bits.count(BitOffset);
+}
+
+bool BitSetInfo::containsValue(
+    const DataLayout &DL,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
+    uint64_t COffset) const {
+  if (auto GV = dyn_cast<GlobalObject>(V)) {
+    auto I = GlobalLayout.find(GV);
+    if (I == GlobalLayout.end())
+      return false;
+    return containsGlobalOffset(I->second + COffset);
+  }
+
+  if (auto GEP = dyn_cast<GEPOperator>(V)) {
+    APInt APOffset(DL.getPointerSizeInBits(0), 0);
+    bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+    if (!Result)
+      return false;
+    COffset += APOffset.getZExtValue();
+    return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+                         COffset);
+  }
+
+  if (auto Op = dyn_cast<Operator>(V)) {
+    if (Op->getOpcode() == Instruction::BitCast)
+      return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+    if (Op->getOpcode() == Instruction::Select)
+      return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+             containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+  }
+
+  return false;
+}
+
+void BitSetInfo::print(raw_ostream &OS) const {
+  OS << "offset " << ByteOffset << " size " << BitSize << " align "
+     << (1 << AlignLog2);
+
+  if (isAllOnes()) {
+    OS << " all-ones\n";
+    return;
+  }
+
+  OS << " { ";
+  for (uint64_t B : Bits)
+    OS << B << ' ';
+  OS << "}\n";
+}
+
+BitSetInfo BitSetBuilder::build() {
+  if (Min > Max)
+    Min = 0;
+
+  // Normalize each offset against the minimum observed offset, and compute
+  // the bitwise OR of each of the offsets. The number of trailing zeros
+  // in the mask gives us the log2 of the alignment of all offsets, which
+  // allows us to compress the bitset by only storing one bit per aligned
+  // address.
+  uint64_t Mask = 0;
+  for (uint64_t &Offset : Offsets) {
+    Offset -= Min;
+    Mask |= Offset;
+  }
+
+  BitSetInfo BSI;
+  BSI.ByteOffset = Min;
+
+  BSI.AlignLog2 = 0;
+  if (Mask != 0)
+    BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+  // Build the compressed bitset while normalizing the offsets against the
+  // computed alignment.
+  BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+  for (uint64_t Offset : Offsets) {
+    Offset >>= BSI.AlignLog2;
+    BSI.Bits.insert(Offset);
+  }
+
+  return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+  // Create a new fragment to hold the layout for F.
+  Fragments.emplace_back();
+  std::vector<uint64_t> &Fragment = Fragments.back();
+  uint64_t FragmentIndex = Fragments.size() - 1;
+
+  for (auto ObjIndex : F) {
+    uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+    if (OldFragmentIndex == 0) {
+      // We haven't seen this object index before, so just add it to the current
+      // fragment.
+      Fragment.push_back(ObjIndex);
+    } else {
+      // This index belongs to an existing fragment. Copy the elements of the
+      // old fragment into this one and clear the old fragment. We don't update
+      // the fragment map just yet, this ensures that any further references to
+      // indices from the old fragment in this fragment do not insert any more
+      // indices.
+      std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+      Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+      OldFragment.clear();
+    }
+  }
+
+  // Update the fragment map to point our object indices to this fragment.
+  for (uint64_t ObjIndex : Fragment)
+    FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+                                uint64_t BitSize, uint64_t &AllocByteOffset,
+                                uint8_t &AllocMask) {
+  // Find the smallest current allocation.
+  unsigned Bit = 0;
+  for (unsigned I = 1; I != BitsPerByte; ++I)
+    if (BitAllocs[I] < BitAllocs[Bit])
+      Bit = I;
+
+  AllocByteOffset = BitAllocs[Bit];
+
+  // Add our size to it.
+  unsigned ReqSize = AllocByteOffset + BitSize;
+  BitAllocs[Bit] = ReqSize;
+  if (Bytes.size() < ReqSize)
+    Bytes.resize(ReqSize);
+
+  // Set our bits.
+  AllocMask = 1 << Bit;
+  for (uint64_t B : Bits)
+    Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
+namespace {
+
+struct ByteArrayInfo {
+  std::set<uint64_t> Bits;
+  uint64_t BitSize;
+  GlobalVariable *ByteArray;
+  Constant *Mask;
+};
+
+struct LowerBitSets : public ModulePass {
+  static char ID;
+  LowerBitSets() : ModulePass(ID) {
+    initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+  }
+
+  Module *M;
+
+  bool LinkerSubsectionsViaSymbols;
+  Triple::ArchType Arch;
+  Triple::ObjectFormatType ObjectFormat;
+  IntegerType *Int1Ty;
+  IntegerType *Int8Ty;
+  IntegerType *Int32Ty;
+  Type *Int32PtrTy;
+  IntegerType *Int64Ty;
+  IntegerType *IntPtrTy;
+
+  // The llvm.bitsets named metadata.
+  NamedMDNode *BitSetNM;
+
+  // Mapping from bitset identifiers to the call sites that test them.
+  DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
+
+  std::vector<ByteArrayInfo> ByteArrayInfos;
+
+  BitSetInfo
+  buildBitSet(Metadata *BitSet,
+              const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+  ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+  void allocateByteArrays();
+  Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+                          Value *BitOffset);
+  void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+                        Constant *CombinedGlobalAddr,
+                        const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+  Value *
+  lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+                  Constant *CombinedGlobal,
+                  const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+  void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+                                       ArrayRef<GlobalVariable *> Globals);
+  unsigned getJumpTableEntrySize();
+  Type *getJumpTableEntryType();
+  Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+                                 unsigned Distance);
+  void verifyBitSetMDNode(MDNode *Op);
+  void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+                                 ArrayRef<Function *> Functions);
+  void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+                                   ArrayRef<GlobalObject *> Globals);
+  bool buildBitSets();
+  bool eraseBitSetMetadata();
+
+  bool doInitialization(Module &M) override;
+  bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &Mod) {
+  M = &Mod;
+  const DataLayout &DL = Mod.getDataLayout();
+
+  Triple TargetTriple(M->getTargetTriple());
+  LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+  Arch = TargetTriple.getArch();
+  ObjectFormat = TargetTriple.getObjectFormat();
+
+  Int1Ty = Type::getInt1Ty(M->getContext());
+  Int8Ty = Type::getInt8Ty(M->getContext());
+  Int32Ty = Type::getInt32Ty(M->getContext());
+  Int32PtrTy = PointerType::getUnqual(Int32Ty);
+  Int64Ty = Type::getInt64Ty(M->getContext());
+  IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+
+  BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+  BitSetTestCallSites.clear();
+
+  return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+    Metadata *BitSet,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+  BitSetBuilder BSB;
+
+  // Compute the byte offset of each element of this bitset.
+  if (BitSetNM) {
+    for (MDNode *Op : BitSetNM->operands()) {
+      if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+        continue;
+      Constant *OpConst =
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+      if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+        OpConst = GA->getAliasee();
+      auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
+      if (!OpGlobal)
+        continue;
+      uint64_t Offset =
+          cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+                                ->getValue())->getZExtValue();
+
+      Offset += GlobalLayout.find(OpGlobal)->second;
+
+      BSB.addOffset(Offset);
+    }
+  }
+
+  return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+                                  Value *BitOffset) {
+  auto BitsType = cast<IntegerType>(Bits->getType());
+  unsigned BitWidth = BitsType->getBitWidth();
+
+  BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+  Value *BitIndex =
+      B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+  Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+  Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+  return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+  // Create globals to stand in for byte arrays and masks. These never actually
+  // get initialized, we RAUW and erase them later in allocateByteArrays() once
+  // we know the offset and mask to use.
+  auto ByteArrayGlobal = new GlobalVariable(
+      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+  auto MaskGlobal = new GlobalVariable(
+      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+  ByteArrayInfos.emplace_back();
+  ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+  BAI->Bits = BSI.Bits;
+  BAI->BitSize = BSI.BitSize;
+  BAI->ByteArray = ByteArrayGlobal;
+  BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+  return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+  std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+                   [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+                     return BAI1.BitSize > BAI2.BitSize;
+                   });
+
+  std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+  ByteArrayBuilder BAB;
+  for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+    ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+    uint8_t Mask;
+    BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+    BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+    cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+  }
+
+  Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+  auto ByteArray =
+      new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+                         GlobalValue::PrivateLinkage, ByteArrayConst);
+
+  for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+    ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+    Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+                        ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+        ByteArrayConst->getType(), ByteArray, Idxs);
+
+    // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+    // that the pc-relative displacement is folded into the lea instead of the
+    // test instruction getting another displacement.
+    if (LinkerSubsectionsViaSymbols) {
+      BAI->ByteArray->replaceAllUsesWith(GEP);
+    } else {
+      GlobalAlias *Alias = GlobalAlias::create(
+          Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
+      BAI->ByteArray->replaceAllUsesWith(Alias);
+    }
+    BAI->ByteArray->eraseFromParent();
+  }
+
+  ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+                      BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+                      BAB.BitAllocs[6] + BAB.BitAllocs[7];
+  ByteArraySizeBytes = BAB.Bytes.size();
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+                                      ByteArrayInfo *&BAI, Value *BitOffset) {
+  if (BSI.BitSize <= 64) {
+    // If the bit set is sufficiently small, we can avoid a load by bit testing
+    // a constant.
+    IntegerType *BitsTy;
+    if (BSI.BitSize <= 32)
+      BitsTy = Int32Ty;
+    else
+      BitsTy = Int64Ty;
+
+    uint64_t Bits = 0;
+    for (auto Bit : BSI.Bits)
+      Bits |= uint64_t(1) << Bit;
+    Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+    return createMaskedBitTest(B, BitsConst, BitOffset);
+  } else {
+    if (!BAI) {
+      ++NumByteArraysCreated;
+      BAI = createByteArray(BSI);
+    }
+
+    Constant *ByteArray = BAI->ByteArray;
+    Type *Ty = BAI->ByteArray->getValueType();
+    if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+      // Each use of the byte array uses a different alias. This makes the
+      // backend less likely to reuse previously computed byte array addresses,
+      // improving the security of the CFI mechanism based on this pass.
+      ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
+                                      GlobalValue::PrivateLinkage, "bits_use",
+                                      ByteArray, M);
+    }
+
+    Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+    Value *Byte = B.CreateLoad(ByteAddr);
+
+    Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+    return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
+  }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+    CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+    Constant *CombinedGlobalIntAddr,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+  Value *Ptr = CI->getArgOperand(0);
+  const DataLayout &DL = M->getDataLayout();
+
+  if (BSI.containsValue(DL, GlobalLayout, Ptr))
+    return ConstantInt::getTrue(M->getContext());
+
+  Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+      CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+  BasicBlock *InitialBB = CI->getParent();
+
+  IRBuilder<> B(CI);
+
+  Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+  if (BSI.isSingleOffset())
+    return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+  Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+  Value *BitOffset;
+  if (BSI.AlignLog2 == 0) {
+    BitOffset = PtrOffset;
+  } else {
+    // We need to check that the offset both falls within our range and is
+    // suitably aligned. We can check both properties at the same time by
+    // performing a right rotate by log2(alignment) followed by an integer
+    // comparison against the bitset size. The rotate will move the lower
+    // order bits that need to be zero into the higher order bits of the
+    // result, causing the comparison to fail if they are nonzero. The rotate
+    // also conveniently gives us a bit offset to use during the load from
+    // the bitset.
+    Value *OffsetSHR =
+        B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+    Value *OffsetSHL = B.CreateShl(
+        PtrOffset,
+        ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
+    BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+  }
+
+  Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+  Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+  // If the bit set is all ones, testing against it is unnecessary.
+  if (BSI.isAllOnes())
+    return OffsetInRange;
+
+  TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+  IRBuilder<> ThenB(Term);
+
+  // Now that we know that the offset is in range and aligned, load the
+  // appropriate bit from the bitset.
+  Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+
+  // The value we want is 0 if we came directly from the initial block
+  // (having failed the range or alignment checks), or the loaded bit if
+  // we came from the block in which we loaded it.
+  B.SetInsertPoint(CI);
+  PHINode *P = B.CreatePHI(Int1Ty, 2);
+  P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+  P->addIncoming(Bit, ThenB.GetInsertBlock());
+  return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+    ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
+  // Build a new global with the combined contents of the referenced globals.
+  // This global is a struct whose even-indexed elements contain the original
+  // contents of the referenced globals and whose odd-indexed elements contain
+  // any padding required to align the next element to the next power of 2.
+  std::vector<Constant *> GlobalInits;
+  const DataLayout &DL = M->getDataLayout();
+  for (GlobalVariable *G : Globals) {
+    GlobalInits.push_back(G->getInitializer());
+    uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
+
+    // Compute the amount of padding required.
+    uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+    // Cap at 128 was found experimentally to have a good data/instruction
+    // overhead tradeoff.
+    if (Padding > 128)
+      Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+    GlobalInits.push_back(
+        ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+  }
+  if (!GlobalInits.empty())
+    GlobalInits.pop_back();
+  Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+  auto *CombinedGlobal =
+      new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+                         GlobalValue::PrivateLinkage, NewInit);
+
+  StructType *NewTy = cast<StructType>(NewInit->getType());
+  const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
+
+  // Compute the offsets of the original globals within the new global.
+  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+  for (unsigned I = 0; I != Globals.size(); ++I)
+    // Multiply by 2 to account for padding elements.
+    GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+  lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
+
+  // Build aliases pointing to offsets into the combined global for each
+  // global from which we built the combined global, and replace references
+  // to the original globals with references to the aliases.
+  for (unsigned I = 0; I != Globals.size(); ++I) {
+    // Multiply by 2 to account for padding elements.
+    Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+                                      ConstantInt::get(Int32Ty, I * 2)};
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+        NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
+    if (LinkerSubsectionsViaSymbols) {
+      Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+    } else {
+      assert(Globals[I]->getType()->getAddressSpace() == 0);
+      GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+                                                Globals[I]->getLinkage(), "",
+                                                CombinedGlobalElemPtr, M);
+      GAlias->setVisibility(Globals[I]->getVisibility());
+      GAlias->takeName(Globals[I]);
+      Globals[I]->replaceAllUsesWith(GAlias);
+    }
+    Globals[I]->eraseFromParent();
+  }
+}
+
+void LowerBitSets::lowerBitSetCalls(
+    ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+  Constant *CombinedGlobalIntAddr =
+      ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+  // For each bitset in this disjoint set...
+  for (Metadata *BS : BitSets) {
+    // Build the bitset.
+    BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+    DEBUG({
+      if (auto BSS = dyn_cast<MDString>(BS))
+        dbgs() << BSS->getString() << ": ";
+      else
+        dbgs() << "<unnamed>: ";
+      BSI.print(dbgs());
+    });
+
+    ByteArrayInfo *BAI = nullptr;
+
+    // Lower each call to llvm.bitset.test for this bitset.
+    for (CallInst *CI : BitSetTestCallSites[BS]) {
+      ++NumBitSetCallsLowered;
+      Value *Lowered =
+          lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+      CI->replaceAllUsesWith(Lowered);
+      CI->eraseFromParent();
+    }
+  }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+  if (Op->getNumOperands() != 3)
+    report_fatal_error(
+        "All operands of llvm.bitsets metadata must have 3 elements");
+  if (!Op->getOperand(1))
+    return;
+
+  auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+  if (!OpConstMD)
+    report_fatal_error("Bit set element must be a constant");
+  auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+  if (!OpGlobal)
+    return;
+
+  if (OpGlobal->isThreadLocal())
+    report_fatal_error("Bit set element may not be thread-local");
+  if (OpGlobal->hasSection())
+    report_fatal_error("Bit set element may not have an explicit section");
+
+  if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+    report_fatal_error("Bit set global var element must be a definition");
+
+  auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+  if (!OffsetConstMD)
+    report_fatal_error("Bit set element offset must be a constant");
+  auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+  if (!OffsetInt)
+    report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+                                             unsigned Distance) {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  const unsigned kJmpPCRel32Code = 0xe9;
+  const unsigned kInt3Code = 0xcc;
+
+  ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+  // Build a constant representing the displacement between the constant's
+  // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+  Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+  Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+  Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+  ConstantInt *DispOffset =
+      ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+  Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+  OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+  ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+  Constant *Fields[] = {
+      Jmp, OffsetedDisp, Int3, Int3, Int3,
+  };
+  return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  return StructType::get(M->getContext(),
+                         {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+                         /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+                                             ArrayRef<Function *> Functions) {
+  // Unlike the global bitset builder, the function bitset builder cannot
+  // re-arrange functions in a particular order and base its calculations on the
+  // layout of the functions' entry points, as we have no idea how large a
+  // particular function will end up being (the size could even depend on what
+  // this pass does!) Instead, we build a jump table, which is a block of code
+  // consisting of one branch instruction for each of the functions in the bit
+  // set that branches to the target function, and redirect any taken function
+  // addresses to the corresponding jump table entry. In the object file's
+  // symbol table, the symbols for the target functions also refer to the jump
+  // table entries, so that addresses taken outside the module will pass any
+  // verification done inside the module.
+  //
+  // In more concrete terms, suppose we have three functions f, g, h which are
+  // members of a single bitset, and a function foo that returns their
+  // addresses:
+  //
+  // f:
+  // mov 0, %eax
+  // ret
+  //
+  // g:
+  // mov 1, %eax
+  // ret
+  //
+  // h:
+  // mov 2, %eax
+  // ret
+  //
+  // foo:
+  // mov f, %eax
+  // mov g, %edx
+  // mov h, %ecx
+  // ret
+  //
+  // To create a jump table for these functions, we instruct the LLVM code
+  // generator to output a jump table in the .text section. This is done by
+  // representing the instructions in the jump table as an LLVM constant and
+  // placing them in a global variable in the .text section. The end result will
+  // (conceptually) look like this:
+  //
+  // f:
+  // jmp .Ltmp0 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // g:
+  // jmp .Ltmp1 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // h:
+  // jmp .Ltmp2 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // .Ltmp0:
+  // mov 0, %eax
+  // ret
+  //
+  // .Ltmp1:
+  // mov 1, %eax
+  // ret
+  //
+  // .Ltmp2:
+  // mov 2, %eax
+  // ret
+  //
+  // foo:
+  // mov f, %eax
+  // mov g, %edx
+  // mov h, %ecx
+  // ret
+  //
+  // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+  // normal case the check can be carried out using the same kind of simple
+  // arithmetic that we normally use for globals.
+
+  assert(!Functions.empty());
+
+  // Build a simple layout based on the regular layout of jump tables.
+  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+  unsigned EntrySize = getJumpTableEntrySize();
+  for (unsigned I = 0; I != Functions.size(); ++I)
+    GlobalLayout[Functions[I]] = I * EntrySize;
+
+  // Create a constant to hold the jump table.
+  ArrayType *JumpTableType =
+      ArrayType::get(getJumpTableEntryType(), Functions.size());
+  auto JumpTable = new GlobalVariable(*M, JumpTableType,
+                                      /*isConstant=*/true,
+                                      GlobalValue::PrivateLinkage, nullptr);
+  JumpTable->setSection(ObjectFormat == Triple::MachO
+                            ? "__TEXT,__text,regular,pure_instructions"
+                            : ".text");
+  lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+  // Build aliases pointing to offsets into the jump table, and replace
+  // references to the original functions with references to the aliases.
+  for (unsigned I = 0; I != Functions.size(); ++I) {
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+        ConstantExpr::getGetElementPtr(
+            JumpTableType, JumpTable,
+            ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+                                 ConstantInt::get(IntPtrTy, I)}),
+        Functions[I]->getType());
+    if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+      Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+    } else {
+      assert(Functions[I]->getType()->getAddressSpace() == 0);
+      GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+                                                Functions[I]->getLinkage(), "",
+                                                CombinedGlobalElemPtr, M);
+      GAlias->setVisibility(Functions[I]->getVisibility());
+      GAlias->takeName(Functions[I]);
+      Functions[I]->replaceAllUsesWith(GAlias);
+    }
+    if (!Functions[I]->isDeclarationForLinker())
+      Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+  }
+
+  // Build and set the jump table's initializer.
+  std::vector<Constant *> JumpTableEntries;
+  for (unsigned I = 0; I != Functions.size(); ++I)
+    JumpTableEntries.push_back(
+        createJumpTableEntry(JumpTable, Functions[I], I));
+  JumpTable->setInitializer(
+      ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+    ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+  llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+  llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+  for (unsigned I = 0; I != BitSets.size(); ++I)
+    BitSetIndices[BitSets[I]] = I;
+  for (unsigned I = 0; I != Globals.size(); ++I)
+    GlobalIndices[Globals[I]] = I;
+
+  // For each bitset, build a set of indices that refer to globals referenced by
+  // the bitset.
+  std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+  if (BitSetNM) {
+    for (MDNode *Op : BitSetNM->operands()) {
+      // Op = { bitset name, global, offset }
+      if (!Op->getOperand(1))
+        continue;
+      auto I = BitSetIndices.find(Op->getOperand(0));
+      if (I == BitSetIndices.end())
+        continue;
+
+      auto OpGlobal = dyn_cast<GlobalObject>(
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      if (!OpGlobal)
+        continue;
+      BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+    }
+  }
+
+  // Order the sets of indices by size. The GlobalLayoutBuilder works best
+  // when given small index sets first.
+  std::stable_sort(
+      BitSetMembers.begin(), BitSetMembers.end(),
+      [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+        return O1.size() < O2.size();
+      });
+
+  // Create a GlobalLayoutBuilder and provide it with index sets as layout
+  // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+  // close together as possible.
+  GlobalLayoutBuilder GLB(Globals.size());
+  for (auto &&MemSet : BitSetMembers)
+    GLB.addFragment(MemSet);
+
+  // Build the bitsets from this disjoint set.
+  if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+    // Build a vector of global variables with the computed layout.
+    std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+    auto OGI = OrderedGVs.begin();
+    for (auto &&F : GLB.Fragments) {
+      for (auto &&Offset : F) {
+        auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+        if (!GV)
+          report_fatal_error(
+              "Bit set may not contain both global variables and functions");
+        *OGI++ = GV;
+      }
+    }
+
+    buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+  } else {
+    // Build a vector of functions with the computed layout.
+    std::vector<Function *> OrderedFns(Globals.size());
+    auto OFI = OrderedFns.begin();
+    for (auto &&F : GLB.Fragments) {
+      for (auto &&Offset : F) {
+        auto Fn = dyn_cast<Function>(Globals[Offset]);
+        if (!Fn)
+          report_fatal_error(
+              "Bit set may not contain both global variables and functions");
+        *OFI++ = Fn;
+      }
+    }
+
+    buildBitSetsFromFunctions(BitSets, OrderedFns);
+  }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets() {
+  Function *BitSetTestFunc =
+      M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+  if (!BitSetTestFunc)
+    return false;
+
+  // Equivalence class set containing bitsets and the globals they reference.
+  // This is used to partition the set of bitsets in the module into disjoint
+  // sets.
+  typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
+      GlobalClassesTy;
+  GlobalClassesTy GlobalClasses;
+
+  // Verify the bitset metadata and build a mapping from bitset identifiers to
+  // their last observed index in BitSetNM. This will used later to
+  // deterministically order the list of bitset identifiers.
+  llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+  if (BitSetNM) {
+    for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+      MDNode *Op = BitSetNM->getOperand(I);
+      verifyBitSetMDNode(Op);
+      BitSetIdIndices[Op->getOperand(0)] = I;
+    }
+  }
+
+  for (const Use &U : BitSetTestFunc->uses()) {
+    auto CI = cast<CallInst>(U.getUser());
+
+    auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+    if (!BitSetMDVal)
+      report_fatal_error(
+          "Second argument of llvm.bitset.test must be metadata");
+    auto BitSet = BitSetMDVal->getMetadata();
+
+    // Add the call site to the list of call sites for this bit set. We also use
+    // BitSetTestCallSites to keep track of whether we have seen this bit set
+    // before. If we have, we don't need to re-add the referenced globals to the
+    // equivalence class.
+    std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
+              bool> Ins =
+        BitSetTestCallSites.insert(
+            std::make_pair(BitSet, std::vector<CallInst *>()));
+    Ins.first->second.push_back(CI);
+    if (!Ins.second)
+      continue;
+
+    // Add the bitset to the equivalence class.
+    GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+    GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+    if (!BitSetNM)
+      continue;
+
+    // Add the referenced globals to the bitset's equivalence class.
+    for (MDNode *Op : BitSetNM->operands()) {
+      if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+        continue;
+
+      auto OpGlobal = dyn_cast<GlobalObject>(
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      if (!OpGlobal)
+        continue;
+
+      CurSet = GlobalClasses.unionSets(
+          CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+    }
+  }
+
+  if (GlobalClasses.empty())
+    return false;
+
+  // Build a list of disjoint sets ordered by their maximum BitSetNM index
+  // for determinism.
+  std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
+  for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+                                 E = GlobalClasses.end();
+       I != E; ++I) {
+    if (!I->isLeader()) continue;
+    ++NumBitSetDisjointSets;
+
+    unsigned MaxIndex = 0;
+    for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+         MI != GlobalClasses.member_end(); ++MI) {
+      if ((*MI).is<Metadata *>())
+        MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
+    }
+    Sets.emplace_back(I, MaxIndex);
+  }
+  std::sort(Sets.begin(), Sets.end(),
+            [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+               const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+              return S1.second < S2.second;
+            });
+
+  // For each disjoint set we found...
+  for (const auto &S : Sets) {
+    // Build the list of bitsets in this disjoint set.
+    std::vector<Metadata *> BitSets;
+    std::vector<GlobalObject *> Globals;
+    for (GlobalClassesTy::member_iterator MI =
+             GlobalClasses.member_begin(S.first);
+         MI != GlobalClasses.member_end(); ++MI) {
+      if ((*MI).is<Metadata *>())
+        BitSets.push_back(MI->get<Metadata *>());
+      else
+        Globals.push_back(MI->get<GlobalObject *>());
+    }
+
+    // Order bitsets by BitSetNM index for determinism. This ordering is stable
+    // as there is a one-to-one mapping between metadata and indices.
+    std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+      return BitSetIdIndices[M1] < BitSetIdIndices[M2];
+    });
+
+    // Lower the bitsets in this disjoint set.
+    buildBitSetsFromDisjointSet(BitSets, Globals);
+  }
+
+  allocateByteArrays();
+
+  return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata() {
+  if (!BitSetNM)
+    return false;
+
+  M->eraseNamedMetadata(BitSetNM);
+  return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+  bool Changed = buildBitSets();
+  Changed |= eraseBitSetMetadata();
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 0000000..8a209a1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,1880 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// Order relation is defined on set of functions. It was made through
+// special function comparison procedure that returns
+// 0 when functions are equal,
+// -1 when Left function is less than right function, and
+// 1 for opposite case. We need total-ordering, so we need to maintain
+// four properties on the functions set:
+// a <= a (reflexivity)
+// if a <= b and b <= a then a = b (antisymmetry)
+// if a <= b and b <= c then a <= c (transitivity).
+// for all a and b: a <= b or b <= a (totality).
+//
+// Comparison iterates through each instruction in each basic block.
+// Functions are kept on binary tree. For each new function F we perform
+// lookup in binary tree.
+// In practice it works the following way:
+// -- We define Function* container class with custom "operator<" (FunctionPtr).
+// -- "FunctionPtr" instances are stored in std::set collection, so every
+//    std::set::insert operation will give you result in log(N) time.
+// 
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+// * Compare complex types with pointer types inside.
+// * Compare cross-reference cases.
+// * Compare complex expressions.
+//
+// All the three issues above could be described as ability to prove that
+// fA == fB == fC == fE == fF == fG in example below:
+//
+//  void fA() {
+//    fB();
+//  }
+//  void fB() {
+//    fA();
+//  }
+//
+//  void fE() {
+//    fF();
+//  }
+//  void fF() {
+//    fG();
+//  }
+//  void fG() {
+//    fE();
+//  }
+//
+// Simplest cross-reference case (fA <--> fB) was implemented in previous
+// versions of MergeFunctions, though it presented only in two function pairs
+// in test-suite (that counts >50k functions)
+// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
+// could cover much more cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mergefunc"
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+static cl::opt<unsigned> NumFunctionsForSanityCheck(
+    "mergefunc-sanity",
+    cl::desc("How many functions in module could be used for "
+             "MergeFunctions pass sanity check. "
+             "'0' disables this check. Works only with '-debug' key."),
+    cl::init(0), cl::Hidden);
+
+namespace {
+
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+  struct Config : ValueMapConfig<GlobalValue*> {
+    enum { FollowRAUW = false };
+  };
+  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+  // occurs, the mapping does not change. Tracking changes is unnecessary, and
+  // also problematic for weak symbols (which may be overwritten).
+  typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+  ValueNumberMap GlobalNumbers;
+  // The next unused serial number to assign to a global.
+  uint64_t NextNumber;
+  public:
+    GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+    uint64_t getNumber(GlobalValue* Global) {
+      ValueNumberMap::iterator MapIter;
+      bool Inserted;
+      std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+      if (Inserted)
+        NextNumber++;
+      return MapIter->second;
+    }
+    void clear() {
+      GlobalNumbers.clear();
+    }
+};
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. DataLayout is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+  FunctionComparator(const Function *F1, const Function *F2,
+                     GlobalNumberState* GN)
+      : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
+
+  /// Test whether the two functions have equivalent behaviour.
+  int compare();
+  /// Hash a function. Equivalent functions will have the same hash, and unequal
+  /// functions will have different hashes with high probability.
+  typedef uint64_t FunctionHash;
+  static FunctionHash functionHash(Function &);
+
+private:
+  /// Test whether two basic blocks have equivalent behaviour.
+  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
+
+  /// Constants comparison.
+  /// Its analog to lexicographical comparison between hypothetical numbers
+  /// of next format:
+  /// <bitcastability-trait><raw-bit-contents>
+  ///
+  /// 1. Bitcastability.
+  /// Check whether L's type could be losslessly bitcasted to R's type.
+  /// On this stage method, in case when lossless bitcast is not possible
+  /// method returns -1 or 1, thus also defining which type is greater in
+  /// context of bitcastability.
+  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+  ///          to the contents comparison.
+  ///          If types differ, remember types comparison result and check
+  ///          whether we still can bitcast types.
+  /// Stage 1: Types that satisfies isFirstClassType conditions are always
+  ///          greater then others.
+  /// Stage 2: Vector is greater then non-vector.
+  ///          If both types are vectors, then vector with greater bitwidth is
+  ///          greater.
+  ///          If both types are vectors with the same bitwidth, then types
+  ///          are bitcastable, and we can skip other stages, and go to contents
+  ///          comparison.
+  /// Stage 3: Pointer types are greater than non-pointers. If both types are
+  ///          pointers of the same address space - go to contents comparison.
+  ///          Different address spaces: pointer with greater address space is
+  ///          greater.
+  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+  ///          We don't know how to bitcast them. So, we better don't do it,
+  ///          and return types comparison result (so it determines the
+  ///          relationship among constants we don't know how to bitcast).
+  ///
+  /// Just for clearance, let's see how the set of constants could look
+  /// on single dimension axis:
+  ///
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  /// Where: NFCT - Not a FirstClassType
+  ///        FCT - FirstClassTyp:
+  ///
+  /// 2. Compare raw contents.
+  /// It ignores types on this stage and only compares bits from L and R.
+  /// Returns 0, if L and R has equivalent contents.
+  /// -1 or 1 if values are different.
+  /// Pretty trivial:
+  /// 2.1. If contents are numbers, compare numbers.
+  ///    Ints with greater bitwidth are greater. Ints with same bitwidths
+  ///    compared by their contents.
+  /// 2.2. "And so on". Just to avoid discrepancies with comments
+  /// perhaps it would be better to read the implementation itself.
+  /// 3. And again about overall picture. Let's look back at how the ordered set
+  /// of constants will look like:
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  ///
+  /// Now look, what could be inside [FCT, "others"], for example:
+  /// [FCT, "others"] =
+  /// [
+  ///   [double 0.1], [double 1.23],
+  ///   [i32 1], [i32 2],
+  ///   { double 1.0 },       ; StructTyID, NumElements = 1
+  ///   { i32 1 },            ; StructTyID, NumElements = 1
+  ///   { double 1, i32 1 },  ; StructTyID, NumElements = 2
+  ///   { i32 1, double 1 }   ; StructTyID, NumElements = 2
+  /// ]
+  ///
+  /// Let's explain the order. Float numbers will be less than integers, just
+  /// because of cmpType terms: FloatTyID < IntegerTyID.
+  /// Floats (with same fltSemantics) are sorted according to their value.
+  /// Then you can see integers, and they are, like a floats,
+  /// could be easy sorted among each others.
+  /// The structures. Structures are grouped at the tail, again because of their
+  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+  /// Structures with greater number of elements are greater. Structures with
+  /// greater elements going first are greater.
+  /// The same logic with vectors, arrays and other possible complex types.
+  ///
+  /// Bitcastable constants.
+  /// Let's assume, that some constant, belongs to some group of
+  /// "so-called-equal" values with different types, and at the same time
+  /// belongs to another group of constants with equal types
+  /// and "really" equal values.
+  ///
+  /// Now, prove that this is impossible:
+  ///
+  /// If constant A with type TyA is bitcastable to B with type TyB, then:
+  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+  ///    those should be vectors (if TyA is vector), pointers
+  ///    (if TyA is pointer), or else (if TyA equal to TyB), those types should
+  ///    be equal to TyB.
+  /// 2. All constants with non-equal, but bitcastable types to TyA, are
+  ///    bitcastable to B.
+  ///    Once again, just because we allow it to vectors and pointers only.
+  ///    This statement could be expanded as below:
+  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+  ///      vector B, and thus bitcastable to B as well.
+  /// 2.2. All pointers of the same address space, no matter what they point to,
+  ///      bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+  /// QED.
+  ///
+  /// In another words, for pointers and vectors, we ignore top-level type and
+  /// look at their particular properties (bit-width for vectors, and
+  /// address space for pointers).
+  /// If these properties are equal - compare their contents.
+  int cmpConstants(const Constant *L, const Constant *R);
+
+  /// Compares two global values by number. Uses the GlobalNumbersState to
+  /// identify the same gobals across function calls.
+  int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
+  /// Assign or look up previously assigned numbers for the two values, and
+  /// return whether the numbers are equal. Numbers are assigned in the order
+  /// visited.
+  /// Comparison order:
+  /// Stage 0: Value that is function itself is always greater then others.
+  ///          If left and right values are references to their functions, then
+  ///          they are equal.
+  /// Stage 1: Constants are greater than non-constants.
+  ///          If both left and right are constants, then the result of
+  ///          cmpConstants is used as cmpValues result.
+  /// Stage 2: InlineAsm instances are greater than others. If both left and
+  ///          right are InlineAsm instances, InlineAsm* pointers casted to
+  ///          integers and compared as numbers.
+  /// Stage 3: For all other cases we compare order we meet these values in
+  ///          their functions. If right value was met first during scanning,
+  ///          then left value is greater.
+  ///          In another words, we compare serial numbers, for more details
+  ///          see comments for sn_mapL and sn_mapR.
+  int cmpValues(const Value *L, const Value *R);
+
+  /// Compare two Instructions for equivalence, similar to
+  /// Instruction::isSameOperationAs but with modifications to the type
+  /// comparison.
+  /// Stages are listed in "most significant stage first" order:
+  /// On each stage below, we do comparison between some left and right
+  /// operation parts. If parts are non-equal, we assign parts comparison
+  /// result to the operation comparison result and exit from method.
+  /// Otherwise we proceed to the next stage.
+  /// Stages:
+  /// 1. Operations opcodes. Compared as numbers.
+  /// 2. Number of operands.
+  /// 3. Operation types. Compared with cmpType method.
+  /// 4. Compare operation subclass optional data as stream of bytes:
+  /// just convert it to integers and call cmpNumbers.
+  /// 5. Compare in operation operand types with cmpType in
+  /// most significant operand first order.
+  /// 6. Last stage. Check operations for some specific attributes.
+  /// For example, for Load it would be:
+  /// 6.1.Load: volatile (as boolean flag)
+  /// 6.2.Load: alignment (as integer numbers)
+  /// 6.3.Load: synch-scope (as integer numbers)
+  /// 6.4.Load: range metadata (as integer numbers)
+  /// On this stage its better to see the code, since its not more than 10-15
+  /// strings for particular instruction, and could change sometimes.
+  int cmpOperations(const Instruction *L, const Instruction *R) const;
+
+  /// Compare two GEPs for equivalent pointer arithmetic.
+  /// Parts to be compared for each comparison stage,
+  /// most significant stage first:
+  /// 1. Address space. As numbers.
+  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
+  /// 3. Pointer operand type (using cmpType method).
+  /// 4. Number of operands.
+  /// 5. Compare operands, using cmpValues method.
+  int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
+  int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+    return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+  }
+
+  /// cmpType - compares two types,
+  /// defines total ordering among the types set.
+  ///
+  /// Return values:
+  /// 0 if types are equal,
+  /// -1 if Left is less than Right,
+  /// +1 if Left is greater than Right.
+  ///
+  /// Description:
+  /// Comparison is broken onto stages. Like in lexicographical comparison
+  /// stage coming first has higher priority.
+  /// On each explanation stage keep in mind total ordering properties.
+  ///
+  /// 0. Before comparison we coerce pointer types of 0 address space to
+  /// integer.
+  /// We also don't bother with same type at left and right, so
+  /// just return 0 in this case.
+  ///
+  /// 1. If types are of different kind (different type IDs).
+  ///    Return result of type IDs comparison, treating them as numbers.
+  /// 2. If types are integers, check that they have the same width. If they
+  /// are vectors, check that they have the same count and subtype.
+  /// 3. Types have the same ID, so check whether they are one of:
+  /// * Void
+  /// * Float
+  /// * Double
+  /// * X86_FP80
+  /// * FP128
+  /// * PPC_FP128
+  /// * Label
+  /// * Metadata
+  /// We can treat these types as equal whenever their IDs are same.
+  /// 4. If Left and Right are pointers, return result of address space
+  /// comparison (numbers comparison). We can treat pointer types of same
+  /// address space as equal.
+  /// 5. If types are complex.
+  /// Then both Left and Right are to be expanded and their element types will
+  /// be checked with the same way. If we get Res != 0 on some stage, return it.
+  /// Otherwise return 0.
+  /// 6. For all other cases put llvm_unreachable.
+  int cmpTypes(Type *TyL, Type *TyR) const;
+
+  int cmpNumbers(uint64_t L, uint64_t R) const;
+  int cmpAPInts(const APInt &L, const APInt &R) const;
+  int cmpAPFloats(const APFloat &L, const APFloat &R) const;
+  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+  int cmpMem(StringRef L, StringRef R) const;
+  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+  int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
+
+  // The two functions undergoing comparison.
+  const Function *FnL, *FnR;
+
+  /// Assign serial numbers to values from left function, and values from
+  /// right function.
+  /// Explanation:
+  /// Being comparing functions we need to compare values we meet at left and
+  /// right sides.
+  /// Its easy to sort things out for external values. It just should be
+  /// the same value at left and right.
+  /// But for local values (those were introduced inside function body)
+  /// we have to ensure they were introduced at exactly the same place,
+  /// and plays the same role.
+  /// Let's assign serial number to each value when we meet it first time.
+  /// Values that were met at same place will be with same serial numbers.
+  /// In this case it would be good to explain few points about values assigned
+  /// to BBs and other ways of implementation (see below).
+  ///
+  /// 1. Safety of BB reordering.
+  /// It's safe to change the order of BasicBlocks in function.
+  /// Relationship with other functions and serial numbering will not be
+  /// changed in this case.
+  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+  /// from the entry, and then take each terminator. So it doesn't matter how in
+  /// fact BBs are ordered in function. And since cmpValues are called during
+  /// this walk, the numbering depends only on how BBs located inside the CFG.
+  /// So the answer is - yes. We will get the same numbering.
+  ///
+  /// 2. Impossibility to use dominance properties of values.
+  /// If we compare two instruction operands: first is usage of local
+  /// variable AL from function FL, and second is usage of local variable AR
+  /// from FR, we could compare their origins and check whether they are
+  /// defined at the same place.
+  /// But, we are still not able to compare operands of PHI nodes, since those
+  /// could be operands from further BBs we didn't scan yet.
+  /// So it's impossible to use dominance properties in general.
+  DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+  // The global state we will use
+  GlobalNumberState* GlobalNumbers;
+};
+
+class FunctionNode {
+  mutable AssertingVH<Function> F;
+  FunctionComparator::FunctionHash Hash;
+public:
+  // Note the hash is recalculated potentially multiple times, but it is cheap.
+  FunctionNode(Function *F)
+    : F(F), Hash(FunctionComparator::functionHash(*F))  {}
+  Function *getFunc() const { return F; }
+  FunctionComparator::FunctionHash getHash() const { return Hash; }
+
+  /// Replace the reference to the function F by the function G, assuming their
+  /// implementations are equal.
+  void replaceBy(Function *G) const {
+    F = G;
+  }
+
+  void release() { F = nullptr; }
+};
+} // end anonymous namespace
+
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+  if (L < R) return -1;
+  if (L > R) return 1;
+  return 0;
+}
+
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
+  if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+    return Res;
+  if (L.ugt(R)) return 1;
+  if (R.ugt(L)) return -1;
+  return 0;
+}
+
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
+  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+  // then by value interpreted as a bitstring (aka APInt).
+  const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+  if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+                           APFloat::semanticsPrecision(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+                           APFloat::semanticsMaxExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+                           APFloat::semanticsMinExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+                           APFloat::semanticsSizeInBits(SR)))
+    return Res;
+  return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
+  // Prevent heavy comparison, compare sizes first.
+  if (int Res = cmpNumbers(L.size(), R.size()))
+    return Res;
+
+  // Compare strings lexicographically only when it is necessary: only when
+  // strings are equal in size.
+  return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+                                 const AttributeSet R) const {
+  if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+    return Res;
+
+  for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+    AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+                           RE = R.end(i);
+    for (; LI != LE && RI != RE; ++LI, ++RI) {
+      Attribute LA = *LI;
+      Attribute RA = *RI;
+      if (LA < RA)
+        return -1;
+      if (RA < LA)
+        return 1;
+    }
+    if (LI != LE)
+      return 1;
+    if (RI != RE)
+      return -1;
+  }
+  return 0;
+}
+
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+                                         const MDNode* R) const {
+  if (L == R)
+    return 0;
+  if (!L)
+    return -1;
+  if (!R)
+    return 1;
+  // Range metadata is a sequence of numbers. Make sure they are the same
+  // sequence. 
+  // TODO: Note that as this is metadata, it is possible to drop and/or merge
+  // this data when considering functions to merge. Thus this comparison would
+  // return 0 (i.e. equivalent), but merging would become more complicated
+  // because the ranges would need to be unioned. It is not likely that
+  // functions differ ONLY in this metadata if they are actually the same
+  // function semantically.
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+  for (size_t I = 0; I < L->getNumOperands(); ++I) {
+    ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+    ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+      return Res;
+  }
+  return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+                                                const Instruction *R) const {
+  ImmutableCallSite LCS(L);
+  ImmutableCallSite RCS(R);
+
+  assert(LCS && RCS && "Must be calls or invokes!");
+  assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+  if (int Res =
+          cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+    return Res;
+
+  for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+    auto OBL = LCS.getOperandBundleAt(i);
+    auto OBR = RCS.getOperandBundleAt(i);
+
+    if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+      return Res;
+
+    if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+      return Res;
+  }
+
+  return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+
+  Type *TyL = L->getType();
+  Type *TyR = R->getType();
+
+  // Check whether types are bitcastable. This part is just re-factored
+  // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+  // we also pack into result which type is "less" for us.
+  int TypesRes = cmpTypes(TyL, TyR);
+  if (TypesRes != 0) {
+    // Types are different, but check whether we can bitcast them.
+    if (!TyL->isFirstClassType()) {
+      if (TyR->isFirstClassType())
+        return -1;
+      // Neither TyL nor TyR are values of first class type. Return the result
+      // of comparing the types
+      return TypesRes;
+    }
+    if (!TyR->isFirstClassType()) {
+      if (TyL->isFirstClassType())
+        return 1;
+      return TypesRes;
+    }
+
+    // Vector -> Vector conversions are always lossless if the two vector types
+    // have the same size, otherwise not.
+    unsigned TyLWidth = 0;
+    unsigned TyRWidth = 0;
+
+    if (auto *VecTyL = dyn_cast<VectorType>(TyL))
+      TyLWidth = VecTyL->getBitWidth();
+    if (auto *VecTyR = dyn_cast<VectorType>(TyR))
+      TyRWidth = VecTyR->getBitWidth();
+
+    if (TyLWidth != TyRWidth)
+      return cmpNumbers(TyLWidth, TyRWidth);
+
+    // Zero bit-width means neither TyL nor TyR are vectors.
+    if (!TyLWidth) {
+      PointerType *PTyL = dyn_cast<PointerType>(TyL);
+      PointerType *PTyR = dyn_cast<PointerType>(TyR);
+      if (PTyL && PTyR) {
+        unsigned AddrSpaceL = PTyL->getAddressSpace();
+        unsigned AddrSpaceR = PTyR->getAddressSpace();
+        if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+          return Res;
+      }
+      if (PTyL)
+        return 1;
+      if (PTyR)
+        return -1;
+
+      // TyL and TyR aren't vectors, nor pointers. We don't know how to
+      // bitcast them.
+      return TypesRes;
+    }
+  }
+
+  // OK, types are bitcastable, now check constant contents.
+
+  if (L->isNullValue() && R->isNullValue())
+    return TypesRes;
+  if (L->isNullValue() && !R->isNullValue())
+    return 1;
+  if (!L->isNullValue() && R->isNullValue())
+    return -1;
+
+  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+  if (GlobalValueL && GlobalValueR) {
+    return cmpGlobalValues(GlobalValueL, GlobalValueR);
+  }
+
+  if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+    return Res;
+
+  if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+    const auto *SeqR = cast<ConstantDataSequential>(R);
+    // This handles ConstantDataArray and ConstantDataVector. Note that we
+    // compare the two raw data arrays, which might differ depending on the host
+    // endianness. This isn't a problem though, because the endiness of a module
+    // will affect the order of the constants, but this order is the same
+    // for a given input module and host platform.
+    return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+  }
+
+  switch (L->getValueID()) {
+  case Value::UndefValueVal:
+  case Value::ConstantTokenNoneVal:
+    return TypesRes;
+  case Value::ConstantIntVal: {
+    const APInt &LInt = cast<ConstantInt>(L)->getValue();
+    const APInt &RInt = cast<ConstantInt>(R)->getValue();
+    return cmpAPInts(LInt, RInt);
+  }
+  case Value::ConstantFPVal: {
+    const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+    const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+    return cmpAPFloats(LAPF, RAPF);
+  }
+  case Value::ConstantArrayVal: {
+    const ConstantArray *LA = cast<ConstantArray>(L);
+    const ConstantArray *RA = cast<ConstantArray>(R);
+    uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+    uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+                                 cast<Constant>(RA->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantStructVal: {
+    const ConstantStruct *LS = cast<ConstantStruct>(L);
+    const ConstantStruct *RS = cast<ConstantStruct>(R);
+    unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (unsigned i = 0; i != NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+                                 cast<Constant>(RS->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantVectorVal: {
+    const ConstantVector *LV = cast<ConstantVector>(L);
+    const ConstantVector *RV = cast<ConstantVector>(R);
+    unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+                                 cast<Constant>(RV->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantExprVal: {
+    const ConstantExpr *LE = cast<ConstantExpr>(L);
+    const ConstantExpr *RE = cast<ConstantExpr>(R);
+    unsigned NumOperandsL = LE->getNumOperands();
+    unsigned NumOperandsR = RE->getNumOperands();
+    if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+      return Res;
+    for (unsigned i = 0; i < NumOperandsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+                                 cast<Constant>(RE->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::BlockAddressVal: {
+    const BlockAddress *LBA = cast<BlockAddress>(L);
+    const BlockAddress *RBA = cast<BlockAddress>(R);
+    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+      return Res;
+    if (LBA->getFunction() == RBA->getFunction()) {
+      // They are BBs in the same function. Order by which comes first in the
+      // BB order of the function. This order is deterministic.
+      Function* F = LBA->getFunction();
+      BasicBlock *LBB = LBA->getBasicBlock();
+      BasicBlock *RBB = RBA->getBasicBlock();
+      if (LBB == RBB)
+        return 0;
+      for(BasicBlock &BB : F->getBasicBlockList()) {
+        if (&BB == LBB) {
+          assert(&BB != RBB);
+          return -1;
+        }
+        if (&BB == RBB)
+          return 1;
+      }
+      llvm_unreachable("Basic Block Address does not point to a basic block in "
+                       "its function.");
+      return -1;
+    } else {
+      // cmpValues said the functions are the same. So because they aren't
+      // literally the same pointer, they must respectively be the left and
+      // right functions.
+      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+      // cmpValues will tell us if these are equivalent BasicBlocks, in the
+      // context of their respective functions.
+      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+    }
+  }
+  default: // Unknown constant, abort.
+    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+    llvm_unreachable("Constant ValueID not recognized.");
+    return -1;
+  }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+  return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
+}
+
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
+  PointerType *PTyL = dyn_cast<PointerType>(TyL);
+  PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  if (PTyL && PTyL->getAddressSpace() == 0)
+    TyL = DL.getIntPtrType(TyL);
+  if (PTyR && PTyR->getAddressSpace() == 0)
+    TyR = DL.getIntPtrType(TyR);
+
+  if (TyL == TyR)
+    return 0;
+
+  if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+    return Res;
+
+  switch (TyL->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release mode.
+  case Type::IntegerTyID:
+    return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+                      cast<IntegerType>(TyR)->getBitWidth());
+  case Type::VectorTyID: {
+    VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+    if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+      return Res;
+    return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+  }
+  // TyL == TyR would have returned true earlier, because types are uniqued.
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+  case Type::TokenTyID:
+    return 0;
+
+  case Type::PointerTyID: {
+    assert(PTyL && PTyR && "Both types must be pointers here.");
+    return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
+  }
+
+  case Type::StructTyID: {
+    StructType *STyL = cast<StructType>(TyL);
+    StructType *STyR = cast<StructType>(TyR);
+    if (STyL->getNumElements() != STyR->getNumElements())
+      return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+    if (STyL->isPacked() != STyR->isPacked())
+      return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+    for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+      if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
+        return Res;
+    }
+    return 0;
+  }
+
+  case Type::FunctionTyID: {
+    FunctionType *FTyL = cast<FunctionType>(TyL);
+    FunctionType *FTyR = cast<FunctionType>(TyR);
+    if (FTyL->getNumParams() != FTyR->getNumParams())
+      return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
+
+    if (FTyL->isVarArg() != FTyR->isVarArg())
+      return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
+
+    if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
+      return Res;
+
+    for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+      if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
+        return Res;
+    }
+    return 0;
+  }
+
+  case Type::ArrayTyID: {
+    ArrayType *ATyL = cast<ArrayType>(TyL);
+    ArrayType *ATyR = cast<ArrayType>(TyR);
+    if (ATyL->getNumElements() != ATyR->getNumElements())
+      return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
+    return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
+  }
+  }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperations(const Instruction *L,
+                                      const Instruction *R) const {
+  // Differences from Instruction::isSameOperationAs:
+  //  * replace type comparison with calls to isEquivalentType.
+  //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+  //  * because of the above, we don't test for the tail bit on calls later on
+  if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+    return Res;
+
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+
+  if (int Res = cmpTypes(L->getType(), R->getType()))
+    return Res;
+
+  if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+                           R->getRawSubclassOptionalData()))
+    return Res;
+
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+    if (int Res = cmpTypes(AI->getAllocatedType(),
+                           cast<AllocaInst>(R)->getAllocatedType()))
+      return Res;
+    if (int Res =
+            cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
+      return Res;
+  }
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+    if (int Res =
+            cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+      return Res;
+  }
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+    if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+      return Res;
+    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+    if (int Res =
+            cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+  }
+  if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+    return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+    if (int Res = cmpNumbers(CI->getCallingConv(),
+                             cast<CallInst>(R)->getCallingConv()))
+      return Res;
+    if (int Res =
+            cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+      return Res;
+    if (int Res = cmpOperandBundlesSchema(CI, R))
+      return Res;
+    return cmpRangeMetadata(
+        CI->getMetadata(LLVMContext::MD_range),
+        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+    if (int Res = cmpNumbers(II->getCallingConv(),
+                             cast<InvokeInst>(R)->getCallingConv()))
+      return Res;
+    if (int Res =
+            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+      return Res;
+    if (int Res = cmpOperandBundlesSchema(II, R))
+      return Res;
+    return cmpRangeMetadata(
+        II->getMetadata(LLVMContext::MD_range),
+        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = IVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = EVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+  }
+  if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+    if (int Res =
+            cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+  }
+
+  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+    if (int Res = cmpNumbers(CXI->isVolatile(),
+                             cast<AtomicCmpXchgInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->isWeak(),
+                             cast<AtomicCmpXchgInst>(R)->isWeak()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
+                             cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->getFailureOrdering(),
+                             cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+      return Res;
+    return cmpNumbers(CXI->getSynchScope(),
+                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
+  }
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+    if (int Res = cmpNumbers(RMWI->getOperation(),
+                             cast<AtomicRMWInst>(R)->getOperation()))
+      return Res;
+    if (int Res = cmpNumbers(RMWI->isVolatile(),
+                             cast<AtomicRMWInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpNumbers(RMWI->getOrdering(),
+                             cast<AtomicRMWInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(RMWI->getSynchScope(),
+                      cast<AtomicRMWInst>(R)->getSynchScope());
+  }
+  return 0;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
+                               const GEPOperator *GEPR) {
+
+  unsigned int ASL = GEPL->getPointerAddressSpace();
+  unsigned int ASR = GEPR->getPointerAddressSpace();
+
+  if (int Res = cmpNumbers(ASL, ASR))
+    return Res;
+
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+  if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+      GEPR->accumulateConstantOffset(DL, OffsetR))
+    return cmpAPInts(OffsetL, OffsetR);
+  if (int Res = cmpTypes(GEPL->getSourceElementType(),
+                         GEPR->getSourceElementType()))
+    return Res;
+
+  if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+    return Res;
+
+  for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+      return Res;
+  }
+
+  return 0;
+}
+
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+                                     const InlineAsm *R) const {
+  // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+  // the same, otherwise compare the fields.
+  if (L == R)
+    return 0;
+  if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+    return Res;
+  if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+    return Res;
+  if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+    return Res;
+  if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+    return Res;
+  if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+    return Res;
+  if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+    return Res;
+  llvm_unreachable("InlineAsm blocks were not uniqued.");
+  return 0;
+}
+
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+  // Catch self-reference case.
+  if (L == FnL) {
+    if (R == FnR)
+      return 0;
+    return -1;
+  }
+  if (R == FnR) {
+    if (L == FnL)
+      return 0;
+    return 1;
+  }
+
+  const Constant *ConstL = dyn_cast<Constant>(L);
+  const Constant *ConstR = dyn_cast<Constant>(R);
+  if (ConstL && ConstR) {
+    if (L == R)
+      return 0;
+    return cmpConstants(ConstL, ConstR);
+  }
+
+  if (ConstL)
+    return 1;
+  if (ConstR)
+    return -1;
+
+  const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+  const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+  if (InlineAsmL && InlineAsmR)
+    return cmpInlineAsm(InlineAsmL, InlineAsmR);
+  if (InlineAsmL)
+    return 1;
+  if (InlineAsmR)
+    return -1;
+
+  auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+       RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+  return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
+// Test whether two basic blocks have equivalent behaviour.
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+                                       const BasicBlock *BBR) {
+  BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+  BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
+
+  do {
+    if (int Res = cmpValues(&*InstL, &*InstR))
+      return Res;
+
+    const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
+    const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR);
+
+    if (GEPL && !GEPR)
+      return 1;
+    if (GEPR && !GEPL)
+      return -1;
+
+    if (GEPL && GEPR) {
+      if (int Res =
+              cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+        return Res;
+      if (int Res = cmpGEPs(GEPL, GEPR))
+        return Res;
+    } else {
+      if (int Res = cmpOperations(&*InstL, &*InstR))
+        return Res;
+      assert(InstL->getNumOperands() == InstR->getNumOperands());
+
+      for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+        Value *OpL = InstL->getOperand(i);
+        Value *OpR = InstR->getOperand(i);
+        if (int Res = cmpValues(OpL, OpR))
+          return Res;
+        // cmpValues should ensure this is true.
+        assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
+      }
+    }
+
+    ++InstL, ++InstR;
+  } while (InstL != InstLE && InstR != InstRE);
+
+  if (InstL != InstLE && InstR == InstRE)
+    return 1;
+  if (InstL == InstLE && InstR != InstRE)
+    return -1;
+  return 0;
+}
+
+// Test whether the two functions have equivalent behaviour.
+int FunctionComparator::compare() {
+  sn_mapL.clear();
+  sn_mapR.clear();
+
+  if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+    return Res;
+
+  if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+    return Res;
+
+  if (FnL->hasGC()) {
+    if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
+      return Res;
+  }
+
+  if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+    return Res;
+
+  if (FnL->hasSection()) {
+    if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
+      return Res;
+  }
+
+  if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+    return Res;
+
+  // TODO: if it's internal and only used in direct calls, we could handle this
+  // case too.
+  if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+    return Res;
+
+  if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
+    return Res;
+
+  assert(FnL->arg_size() == FnR->arg_size() &&
+         "Identically typed functions have different numbers of args!");
+
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+                                    ArgRI = FnR->arg_begin(),
+                                    ArgLE = FnL->arg_end();
+       ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+    if (cmpValues(&*ArgLI, &*ArgRI) != 0)
+      llvm_unreachable("Arguments repeat!");
+  }
+
+  // We do a CFG-ordered walk since the actual ordering of the blocks in the
+  // linked list is immaterial. Our walk starts at the entry block for both
+  // functions, then takes each block from each terminator in order. As an
+  // artifact, this also means that unreachable blocks are ignored.
+  SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+  FnLBBs.push_back(&FnL->getEntryBlock());
+  FnRBBs.push_back(&FnR->getEntryBlock());
+
+  VisitedBBs.insert(FnLBBs[0]);
+  while (!FnLBBs.empty()) {
+    const BasicBlock *BBL = FnLBBs.pop_back_val();
+    const BasicBlock *BBR = FnRBBs.pop_back_val();
+
+    if (int Res = cmpValues(BBL, BBR))
+      return Res;
+
+    if (int Res = cmpBasicBlocks(BBL, BBR))
+      return Res;
+
+    const TerminatorInst *TermL = BBL->getTerminator();
+    const TerminatorInst *TermR = BBR->getTerminator();
+
+    assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+    for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
+        continue;
+
+      FnLBBs.push_back(TermL->getSuccessor(i));
+      FnRBBs.push_back(TermR->getSuccessor(i));
+    }
+  }
+  return 0;
+}
+
+namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+  uint64_t Hash;
+public:
+  // Initialize to random constant, so the state isn't zero.
+  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+  void add(uint64_t V) {
+     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+  }
+  // No finishing is required, because the entire hash value is used.
+  uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+  HashAccumulator64 H;
+  H.add(F.isVarArg());
+  H.add(F.arg_size());
+  
+  SmallVector<const BasicBlock *, 8> BBs;
+  SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+  // accumulating the hash of the function "structure." (BB and opcode sequence)
+  BBs.push_back(&F.getEntryBlock());
+  VisitedBBs.insert(BBs[0]);
+  while (!BBs.empty()) {
+    const BasicBlock *BB = BBs.pop_back_val();
+    // This random value acts as a block header, as otherwise the partition of
+    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+    H.add(45798); 
+    for (auto &Inst : *BB) {
+      H.add(Inst.getOpcode());
+    }
+    const TerminatorInst *Term = BB->getTerminator();
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+        continue;
+      BBs.push_back(Term->getSuccessor(i));
+    }
+  }
+  return H.getHash();
+}
+
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+  static char ID;
+  MergeFunctions()
+    : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+      HasGlobalAliases(false) {
+    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+
+private:
+  // The function comparison operator is provided here so that FunctionNodes do
+  // not need to become larger with another pointer.
+  class FunctionNodeCmp {
+    GlobalNumberState* GlobalNumbers;
+  public:
+    FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+    bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+      // Order first by hashes, then full function comparison.
+      if (LHS.getHash() != RHS.getHash())
+        return LHS.getHash() < RHS.getHash();
+      FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+      return FCmp.compare() == -1;
+    }
+  };
+  typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+  GlobalNumberState GlobalNumbers;
+
+  /// A work queue of functions that may have been modified and should be
+  /// analyzed again.
+  std::vector<WeakVH> Deferred;
+
+  /// Checks the rules of order relation introduced among functions set.
+  /// Returns true, if sanity check has been passed, and false if failed.
+  bool doSanityCheck(std::vector<WeakVH> &Worklist);
+
+  /// Insert a ComparableFunction into the FnTree, or merge it away if it's
+  /// equal to one that's already present.
+  bool insert(Function *NewFunction);
+
+  /// Remove a Function from the FnTree and queue it up for a second sweep of
+  /// analysis.
+  void remove(Function *F);
+
+  /// Find the functions that use this Value and remove them from FnTree and
+  /// queue the functions.
+  void removeUsers(Value *V);
+
+  /// Replace all direct calls of Old with calls of New. Will bitcast New if
+  /// necessary to make types match.
+  void replaceDirectCallers(Function *Old, Function *New);
+
+  /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+  /// be converted into a thunk. In either case, it should never be visited
+  /// again.
+  void mergeTwoFunctions(Function *F, Function *G);
+
+  /// Replace G with a thunk or an alias to F. Deletes G.
+  void writeThunkOrAlias(Function *F, Function *G);
+
+  /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+  /// of G with bitcast(F). Deletes G.
+  void writeThunk(Function *F, Function *G);
+
+  /// Replace G with an alias to F. Deletes G.
+  void writeAlias(Function *F, Function *G);
+
+  /// Replace function F with function G in the function tree.
+  void replaceFunctionInTree(const FunctionNode &FN, Function *G);
+
+  /// The set of all distinct functions. Use the insert() and remove() methods
+  /// to modify it. The map allows efficient lookup and deferring of Functions.
+  FnTreeType FnTree;
+  // Map functions to the iterators of the FunctionNode which contains them
+  // in the FnTree. This must be updated carefully whenever the FnTree is
+  // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+  // dangling iterators into FnTree. The invariant that preserves this is that
+  // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+  ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
+
+  /// Whether or not the target supports global aliases.
+  bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+  return new MergeFunctions();
+}
+
+bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+  if (const unsigned Max = NumFunctionsForSanityCheck) {
+    unsigned TripleNumber = 0;
+    bool Valid = true;
+
+    dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
+
+    unsigned i = 0;
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+         I != E && i < Max; ++I, ++i) {
+      unsigned j = i;
+      for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+        Function *F1 = cast<Function>(*I);
+        Function *F2 = cast<Function>(*J);
+        int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+        int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
+
+        // If F1 <= F2, then F2 >= F1, otherwise report failure.
+        if (Res1 != -Res2) {
+          dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
+                 << "\n";
+          F1->dump();
+          F2->dump();
+          Valid = false;
+        }
+
+        if (Res1 == 0)
+          continue;
+
+        unsigned k = j;
+        for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+             ++k, ++K, ++TripleNumber) {
+          if (K == J)
+            continue;
+
+          Function *F3 = cast<Function>(*K);
+          int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+          int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
+
+          bool Transitive = true;
+
+          if (Res1 != 0 && Res1 == Res4) {
+            // F1 > F2, F2 > F3 => F1 > F3
+            Transitive = Res3 == Res1;
+          } else if (Res3 != 0 && Res3 == -Res4) {
+            // F1 > F3, F3 > F2 => F1 > F2
+            Transitive = Res3 == Res1;
+          } else if (Res4 != 0 && -Res3 == Res4) {
+            // F2 > F3, F3 > F1 => F2 > F1
+            Transitive = Res4 == -Res1;
+          }
+
+          if (!Transitive) {
+            dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
+                   << TripleNumber << "\n";
+            dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
+                   << Res4 << "\n";
+            F1->dump();
+            F2->dump();
+            F3->dump();
+            Valid = false;
+          }
+        }
+      }
+    }
+
+    dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
+    return Valid;
+  }
+  return true;
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // All functions in the module, ordered by hash. Functions with a unique
+  // hash value are easily eliminated.
+  std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+    HashedFuncs;
+  for (Function &Func : M) {
+    if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+      HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+    } 
+  }
+
+  std::stable_sort(
+      HashedFuncs.begin(), HashedFuncs.end(),
+      [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+         const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+        return a.first < b.first;
+      });
+
+  auto S = HashedFuncs.begin();
+  for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+    // If the hash value matches the previous value or the next one, we must
+    // consider merging it. Otherwise it is dropped and never considered again.
+    if ((I != S && std::prev(I)->first == I->first) ||
+        (std::next(I) != IE && std::next(I)->first == I->first) ) {
+      Deferred.push_back(WeakVH(I->second));
+    }
+  }
+  
+  do {
+    std::vector<WeakVH> Worklist;
+    Deferred.swap(Worklist);
+
+    DEBUG(doSanityCheck(Worklist));
+
+    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+    DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+    // Insert only strong functions and merge them. Strong function merging
+    // always deletes one of them.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          !F->mayBeOverridden()) {
+        Changed |= insert(F);
+      }
+    }
+
+    // Insert only weak functions and merge them. By doing these second we
+    // create thunks to the strong function when possible. When two weak
+    // functions are identical, we create a new strong function with two weak
+    // weak thunks to it which are identical but not mergable.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          F->mayBeOverridden()) {
+        Changed |= insert(F);
+      }
+    }
+    DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
+  } while (!Deferred.empty());
+
+  FnTree.clear();
+  GlobalNumbers.clear();
+
+  return Changed;
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+  Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+  for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
+    Use *U = &*UI;
+    ++UI;
+    CallSite CS(U->getUser());
+    if (CS && CS.isCallee(U)) {
+      // Transfer the called function's attributes to the call site. Due to the
+      // bitcast we will 'lose' ABI changing attributes because the 'called
+      // function' is no longer a Function* but the bitcast. Code that looks up
+      // the attributes from the called function will fail.
+
+      // FIXME: This is not actually true, at least not anymore. The callsite
+      // will always have the same ABI affecting attributes as the callee,
+      // because otherwise the original input has UB. Note that Old and New
+      // always have matching ABI, so no attributes need to be changed.
+      // Transferring other attributes may help other optimizations, but that
+      // should be done uniformly and not in this ad-hoc way.
+      auto &Context = New->getContext();
+      auto NewFuncAttrs = New->getAttributes();
+      auto CallSiteAttrs = CS.getAttributes();
+
+      CallSiteAttrs = CallSiteAttrs.addAttributes(
+          Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+      for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+        AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+        if (Attrs.getNumSlots())
+          CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+      }
+
+      CS.setAttributes(CallSiteAttrs);
+
+      remove(CS.getInstruction()->getParent()->getParent());
+      U->set(BitcastNew);
+    }
+  }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+  if (HasGlobalAliases && G->hasUnnamedAddr()) {
+    if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+        G->hasWeakLinkage()) {
+      writeAlias(F, G);
+      return;
+    }
+  }
+
+  writeThunk(F, G);
+}
+
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simpler then CastInst::getCastOpcode.
+static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+  Type *SrcTy = V->getType();
+  if (SrcTy->isStructTy()) {
+    assert(DestTy->isStructTy());
+    assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+    Value *Result = UndefValue::get(DestTy);
+    for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+      Value *Element = createCast(
+          Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
+          DestTy->getStructElementType(I));
+
+      Result =
+          Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
+    }
+    return Result;
+  }
+  assert(!DestTy->isStructTy());
+  if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+    return Builder.CreateIntToPtr(V, DestTy);
+  else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+    return Builder.CreatePtrToInt(V, DestTy);
+  else
+    return Builder.CreateBitCast(V, DestTy);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+  if (!G->mayBeOverridden()) {
+    // Redirect direct callers of G to F.
+    replaceDirectCallers(G, F);
+  }
+
+  // If G was internal then we may have replaced all uses of G with F. If so,
+  // stop here and delete G. There's no need for a thunk.
+  if (G->hasLocalLinkage() && G->use_empty()) {
+    G->eraseFromParent();
+    return;
+  }
+
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+  IRBuilder<false> Builder(BB);
+
+  SmallVector<Value *, 16> Args;
+  unsigned i = 0;
+  FunctionType *FFTy = F->getFunctionType();
+  for (Argument & AI : NewG->args()) {
+    Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
+    ++i;
+  }
+
+  CallInst *CI = Builder.CreateCall(F, Args);
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  CI->setAttributes(F->getAttributes());
+  if (NewG->getReturnType()->isVoidTy()) {
+    Builder.CreateRetVoid();
+  } else {
+    Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
+  }
+
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  removeUsers(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+  ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+  auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  removeUsers(G);
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+  ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+  if (F->mayBeOverridden()) {
+    assert(G->mayBeOverridden());
+
+    // Make them both thunks to the same internal function.
+    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                   F->getParent());
+    H->copyAttributesFrom(F);
+    H->takeName(F);
+    removeUsers(F);
+    F->replaceAllUsesWith(H);
+
+    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+    if (HasGlobalAliases) {
+      writeAlias(F, G);
+      writeAlias(F, H);
+    } else {
+      writeThunk(F, G);
+      writeThunk(F, H);
+    }
+
+    F->setAlignment(MaxAlignment);
+    F->setLinkage(GlobalValue::PrivateLinkage);
+    ++NumDoubleWeak;
+  } else {
+    writeThunkOrAlias(F, G);
+  }
+
+  ++NumFunctionsMerged;
+}
+
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
+                                           Function *G) {
+  Function *F = FN.getFunc();
+  assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+         "The two functions must be equal");
+  
+  auto I = FNodesInTree.find(F);
+  assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+  assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+  
+  FnTreeType::iterator IterToFNInFnTree = I->second;
+  assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+  // Remove F -> FN and insert G -> FN
+  FNodesInTree.erase(I);
+  FNodesInTree.insert({G, IterToFNInFnTree});
+  // Replace F with G in FN, which is stored inside the FnTree.
+  FN.replaceBy(G);
+}
+
+// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(Function *NewFunction) {
+  std::pair<FnTreeType::iterator, bool> Result =
+      FnTree.insert(FunctionNode(NewFunction));
+
+  if (Result.second) {
+    assert(FNodesInTree.count(NewFunction) == 0);
+    FNodesInTree.insert({NewFunction, Result.first});
+    DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
+    return false;
+  }
+
+  const FunctionNode &OldF = *Result.first;
+
+  // Don't merge tiny functions, since it can just end up making the function
+  // larger.
+  // FIXME: Should still merge them if they are unnamed_addr and produce an
+  // alias.
+  if (NewFunction->size() == 1) {
+    if (NewFunction->front().size() <= 2) {
+      DEBUG(dbgs() << NewFunction->getName()
+                   << " is to small to bother merging\n");
+      return false;
+    }
+  }
+
+  // Impose a total order (by name) on the replacement of functions. This is
+  // important when operating on more than one module independently to prevent
+  // cycles of thunks calling each other when the modules are linked together.
+  //
+  // When one function is weak and the other is strong there is an order imposed
+  // already. We process strong functions before weak functions.
+  if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) ||
+      (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden()))
+    if (OldF.getFunc()->getName() > NewFunction->getName()) {
+      // Swap the two functions.
+      Function *F = OldF.getFunc();
+      replaceFunctionInTree(*Result.first, NewFunction);
+      NewFunction = F;
+      assert(OldF.getFunc() != F && "Must have swapped the functions.");
+    }
+
+  // Never thunk a strong function to a weak function.
+  assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
+
+  DEBUG(dbgs() << "  " << OldF.getFunc()->getName()
+               << " == " << NewFunction->getName() << '\n');
+
+  Function *DeleteF = NewFunction;
+  mergeTwoFunctions(OldF.getFunc(), DeleteF);
+  return true;
+}
+
+// Remove a function from FnTree. If it was already in FnTree, add
+// it to Deferred so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+  auto I = FNodesInTree.find(F);
+  if (I != FNodesInTree.end()) {
+    DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+    FnTree.erase(I->second);
+    // I->second has been invalidated, remove it from the FNodesInTree map to
+    // preserve the invariant.
+    FNodesInTree.erase(I);
+    Deferred.emplace_back(F);
+  }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+  std::vector<Value *> Worklist;
+  Worklist.push_back(V);
+  SmallSet<Value*, 8> Visited;
+  Visited.insert(V);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+
+    for (User *U : V->users()) {
+      if (Instruction *I = dyn_cast<Instruction>(U)) {
+        remove(I->getParent()->getParent());
+      } else if (isa<GlobalValue>(U)) {
+        // do nothing
+      } else if (Constant *C = dyn_cast<Constant>(U)) {
+        for (User *UU : C->users()) {
+          if (!Visited.insert(UU).second)
+            Worklist.push_back(UU);
+        }
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 0000000..0c5c84b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,183 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "partialinlining"
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+  struct PartialInliner : public ModulePass {
+    void getAnalysisUsage(AnalysisUsage &AU) const override { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(ID) {
+      initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module& M) override;
+
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+                "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock *entryBlock = &F->front();
+  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  if (!BR || BR->isUnconditional())
+    return nullptr;
+  
+  BasicBlock* returnBlock = nullptr;
+  BasicBlock* nonReturnBlock = nullptr;
+  unsigned returnCount = 0;
+  for (BasicBlock *BB : successors(entryBlock)) {
+    if (isa<ReturnInst>(BB->getTerminator())) {
+      returnBlock = BB;
+      returnCount++;
+    } else
+      nonReturnBlock = BB;
+  }
+  
+  if (returnCount != 1)
+    return nullptr;
+  
+  // Clone the function, so that we can hack away on it.
+  ValueToValueMapTy VMap;
+  Function* duplicateFunction = CloneFunction(F, VMap,
+                                              /*ModuleLevelChanges=*/false);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+      newReturnBlock->getFirstNonPHI()->getIterator());
+  BasicBlock::iterator I = preReturn->begin();
+  Instruction *Ins = &newReturnBlock->front();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+
+    PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+
+    retPhi->addIncoming(&*I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(&*FI);
+
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*duplicateFunction);
+
+  // Extract the body of the if.
+  Function* extractedFunction
+    = CodeExtractor(toExtract, &DT).extractCodeRegion();
+  
+  InlineFunctionInfo IFI;
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User *> Users(duplicateFunction->user_begin(),
+                            duplicateFunction->user_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst *CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI, IFI);
+    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II, IFI);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  ++NumPartialInlined;
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+      worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (User *U : currFunc->users())
+      if (Instruction* I = dyn_cast<Instruction>(U))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
new file mode 100644
index 0000000..9876efa
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,733 @@
+//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManagerBuilder class, which is used to set up a
+// "standard" optimization sequence suitable for languages like C and C++.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+RunLoopVectorization("vectorize-loops", cl::Hidden,
+                     cl::desc("Run the Loop vectorization passes"));
+
+static cl::opt<bool>
+RunSLPVectorization("vectorize-slp", cl::Hidden,
+                    cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
+                    cl::desc("Run the BB vectorization passes"));
+
+static cl::opt<bool>
+UseGVNAfterVectorization("use-gvn-after-vectorization",
+  cl::init(false), cl::Hidden,
+  cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+
+static cl::opt<bool> ExtraVectorizerPasses(
+    "extra-vectorizer-passes", cl::init(false), cl::Hidden,
+    cl::desc("Run cleanup optimization passes after vectorization."));
+
+static cl::opt<bool> UseNewSROA("use-new-sroa",
+  cl::init(true), cl::Hidden,
+  cl::desc("Enable the new, experimental SROA pass"));
+
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+                 cl::desc("Run the loop rerolling pass"));
+
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+             cl::desc("Run the float2int (float demotion) pass"));
+
+static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
+                                    cl::Hidden,
+                                    cl::desc("Run the load combining pass"));
+
+static cl::opt<bool>
+RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
+  cl::init(true), cl::Hidden,
+  cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
+           "vectorizer instead of before"));
+
+static cl::opt<bool> UseCFLAA("use-cfl-aa",
+  cl::init(false), cl::Hidden,
+  cl::desc("Enable the new, experimental CFL alias analysis"));
+
+static cl::opt<bool>
+EnableMLSM("mlsm", cl::init(true), cl::Hidden,
+           cl::desc("Enable motion of merged load and store"));
+
+static cl::opt<bool> EnableLoopInterchange(
+    "enable-loopinterchange", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
+static cl::opt<bool> EnableLoopDistribute(
+    "enable-loop-distribute", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+    "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+    cl::desc(
+        "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+    "enable-loop-load-elim", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
+PassManagerBuilder::PassManagerBuilder() {
+    OptLevel = 2;
+    SizeLevel = 0;
+    LibraryInfo = nullptr;
+    Inliner = nullptr;
+    FunctionIndex = nullptr;
+    DisableUnitAtATime = false;
+    DisableUnrollLoops = false;
+    BBVectorize = RunBBVectorization;
+    SLPVectorize = RunSLPVectorization;
+    LoopVectorize = RunLoopVectorization;
+    RerollLoops = RunLoopRerolling;
+    LoadCombine = RunLoadCombine;
+    DisableGVNLoadPRE = false;
+    VerifyInput = false;
+    VerifyOutput = false;
+    MergeFunctions = false;
+    PrepareForLTO = false;
+}
+
+PassManagerBuilder::~PassManagerBuilder() {
+  delete LibraryInfo;
+  delete Inliner;
+}
+
+/// Set of global extensions, automatically added as part of the standard set.
+static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
+   PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+
+void PassManagerBuilder::addGlobalExtension(
+    PassManagerBuilder::ExtensionPointTy Ty,
+    PassManagerBuilder::ExtensionFn Fn) {
+  GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
+  Extensions.push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
+                                           legacy::PassManagerBase &PM) const {
+  for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
+    if ((*GlobalExtensions)[i].first == ETy)
+      (*GlobalExtensions)[i].second(*this, PM);
+  for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
+    if (Extensions[i].first == ETy)
+      Extensions[i].second(*this, PM);
+}
+
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+    legacy::PassManagerBase &PM) const {
+  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+  // BasicAliasAnalysis wins if they disagree. This is intended to help
+  // support "obvious" type-punning idioms.
+  if (UseCFLAA)
+    PM.add(createCFLAAWrapperPass());
+  PM.add(createTypeBasedAAWrapperPass());
+  PM.add(createScopedNoAliasAAWrapperPass());
+}
+
+void PassManagerBuilder::populateFunctionPassManager(
+    legacy::FunctionPassManager &FPM) {
+  addExtensionsToPM(EP_EarlyAsPossible, FPM);
+
+  // Add LibraryInfo if we have some.
+  if (LibraryInfo)
+    FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+  if (OptLevel == 0) return;
+
+  addInitialAliasAnalysisPasses(FPM);
+
+  FPM.add(createCFGSimplificationPass());
+  if (UseNewSROA)
+    FPM.add(createSROAPass());
+  else
+    FPM.add(createScalarReplAggregatesPass());
+  FPM.add(createEarlyCSEPass());
+  FPM.add(createLowerExpectIntrinsicPass());
+}
+
+void PassManagerBuilder::populateModulePassManager(
+    legacy::PassManagerBase &MPM) {
+  // Allow forcing function attributes as a debugging and tuning aid.
+  MPM.add(createForceFunctionAttrsLegacyPass());
+
+  // If all optimizations are disabled, just run the always-inline pass and,
+  // if enabled, the function merging pass.
+  if (OptLevel == 0) {
+    if (Inliner) {
+      MPM.add(Inliner);
+      Inliner = nullptr;
+    }
+
+    // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
+    // creates a CGSCC pass manager, but we don't want to add extensions into
+    // that pass manager. To prevent this we insert a no-op module pass to reset
+    // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
+    // builds. The function merging pass is 
+    if (MergeFunctions)
+      MPM.add(createMergeFunctionsPass());
+    else if (!GlobalExtensions->empty() || !Extensions.empty())
+      MPM.add(createBarrierNoopPass());
+
+    addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+    return;
+  }
+
+  // Add LibraryInfo if we have some.
+  if (LibraryInfo)
+    MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+  addInitialAliasAnalysisPasses(MPM);
+
+  if (!DisableUnitAtATime) {
+    // Infer attributes about declarations if possible.
+    MPM.add(createInferFunctionAttrsLegacyPass());
+
+    addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
+    MPM.add(createIPSCCPPass());              // IP SCCP
+    MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
+    // Promote any localized global vars
+    MPM.add(createPromoteMemoryToRegisterPass());
+
+    MPM.add(createDeadArgEliminationPass());  // Dead argument elimination
+
+    MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+    addExtensionsToPM(EP_Peephole, MPM);
+    MPM.add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
+  }
+
+  if (EnableNonLTOGlobalsModRef)
+    // We add a module alias analysis pass here. In part due to bugs in the
+    // analysis infrastructure this "works" in that the analysis stays alive
+    // for the entire SCC pass run below.
+    MPM.add(createGlobalsAAWrapperPass());
+
+  // Start of CallGraph SCC passes.
+  if (!DisableUnitAtATime)
+    MPM.add(createPruneEHPass());             // Remove dead EH info
+  if (Inliner) {
+    MPM.add(Inliner);
+    Inliner = nullptr;
+  }
+  if (!DisableUnitAtATime)
+    MPM.add(createFunctionAttrsPass());       // Set readonly/readnone attrs
+  if (OptLevel > 2)
+    MPM.add(createArgumentPromotionPass());   // Scalarize uninlined fn args
+
+  // Start of function pass.
+  // Break up aggregate allocas, using SSAUpdater.
+  if (UseNewSROA)
+    MPM.add(createSROAPass());
+  else
+    MPM.add(createScalarReplAggregatesPass(-1, false));
+  MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
+  MPM.add(createJumpThreadingPass());         // Thread jumps.
+  MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+  MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
+  MPM.add(createInstructionCombiningPass());  // Combine silly seq's
+  addExtensionsToPM(EP_Peephole, MPM);
+
+  MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+  MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
+  MPM.add(createReassociatePass());           // Reassociate expressions
+  // Rotate Loop - disable header duplication at -Oz
+  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+  MPM.add(createLICMPass());                  // Hoist loop invariants
+  MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+  MPM.add(createCFGSimplificationPass());
+  MPM.add(createInstructionCombiningPass());
+  MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
+  MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
+  MPM.add(createLoopDeletionPass());          // Delete dead loops
+  if (EnableLoopInterchange) {
+    MPM.add(createLoopInterchangePass()); // Interchange loops
+    MPM.add(createCFGSimplificationPass());
+  }
+  if (!DisableUnrollLoops)
+    MPM.add(createSimpleLoopUnrollPass());    // Unroll small loops
+  addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+
+  if (OptLevel > 1) {
+    if (EnableMLSM)
+      MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+    MPM.add(createGVNPass(DisableGVNLoadPRE));  // Remove redundancies
+  }
+  MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
+  MPM.add(createSCCPPass());                  // Constant prop with SCCP
+
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
+
+  // Run instcombine after redundancy elimination to exploit opportunities
+  // opened up by them.
+  MPM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, MPM);
+  MPM.add(createJumpThreadingPass());         // Thread jumps
+  MPM.add(createCorrelatedValuePropagationPass());
+  MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
+  MPM.add(createLICMPass());
+
+  addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
+  if (RerollLoops)
+    MPM.add(createLoopRerollPass());
+  if (!RunSLPAfterLoopVectorization) {
+    if (SLPVectorize)
+      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
+
+    if (BBVectorize) {
+      MPM.add(createBBVectorizePass());
+      MPM.add(createInstructionCombiningPass());
+      addExtensionsToPM(EP_Peephole, MPM);
+      if (OptLevel > 1 && UseGVNAfterVectorization)
+        MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+      else
+        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
+
+      // BBVectorize may have significantly shortened a loop body; unroll again.
+      if (!DisableUnrollLoops)
+        MPM.add(createLoopUnrollPass());
+    }
+  }
+
+  if (LoadCombine)
+    MPM.add(createLoadCombinePass());
+
+  MPM.add(createAggressiveDCEPass());         // Delete dead instructions
+  MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+  MPM.add(createInstructionCombiningPass());  // Clean up after everything.
+  addExtensionsToPM(EP_Peephole, MPM);
+
+  // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+  // pass manager that we are specifically trying to avoid. To prevent this
+  // we must insert a no-op module pass to reset the pass manager.
+  MPM.add(createBarrierNoopPass());
+
+  if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+    // Remove avail extern fns and globals definitions if we aren't
+    // compiling an object file for later LTO. For LTO we want to preserve
+    // these so they are eligible for inlining at link-time. Note if they
+    // are unreferenced they will be removed by GlobalDCE later, so
+    // this only impacts referenced available externally globals.
+    // Eventually they will be suppressed during codegen, but eliminating
+    // here enables more opportunity for GlobalDCE as it may make
+    // globals referenced by available external functions dead
+    // and saves running remaining passes on the eliminated functions.
+    MPM.add(createEliminateAvailableExternallyPass());
+  }
+
+  if (EnableNonLTOGlobalsModRef)
+    // We add a fresh GlobalsModRef run at this point. This is particularly
+    // useful as the above will have inlined, DCE'ed, and function-attr
+    // propagated everything. We should at this point have a reasonably minimal
+    // and richly annotated call graph. By computing aliasing and mod/ref
+    // information for all local globals here, the late loop passes and notably
+    // the vectorizer will be able to use them to help recognize vectorizable
+    // memory operations.
+    //
+    // Note that this relies on a bug in the pass manager which preserves
+    // a module analysis into a function pass pipeline (and throughout it) so
+    // long as the first function pass doesn't invalidate the module analysis.
+    // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+    // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+    // (doing nothing preserves it as it is required to be conservatively
+    // correct in the face of IR changes).
+    MPM.add(createGlobalsAAWrapperPass());
+
+  if (RunFloat2Int)
+    MPM.add(createFloat2IntPass());
+
+  addExtensionsToPM(EP_VectorizerStart, MPM);
+
+  // Re-rotate loops in all our loop nests. These may have fallout out of
+  // rotated form due to GVN or other transformations, and the vectorizer relies
+  // on the rotated form. Disable header duplication at -Oz.
+  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+
+  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
+  // into separate loop that would otherwise inhibit vectorization.
+  if (EnableLoopDistribute)
+    MPM.add(createLoopDistributePass());
+
+  MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+  // Eliminate loads by forwarding stores from the previous iteration to loads
+  // of the current iteration.
+  if (EnableLoopLoadElim)
+    MPM.add(createLoopLoadEliminationPass());
+
+  // FIXME: Because of #pragma vectorize enable, the passes below are always
+  // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
+  // on -O1 and no #pragma is found). Would be good to have these two passes
+  // as function calls, so that we can only pass them when the vectorizer
+  // changed the code.
+  MPM.add(createInstructionCombiningPass());
+  if (OptLevel > 1 && ExtraVectorizerPasses) {
+    // At higher optimization levels, try to clean up any runtime overlap and
+    // alignment checks inserted by the vectorizer. We want to track correllated
+    // runtime checks for two inner loops in the same outer loop, fold any
+    // common computations, hoist loop-invariant aspects out of any outer loop,
+    // and unswitch the runtime checks if possible. Once hoisted, we may have
+    // dead (or speculatable) control flows or more combining opportunities.
+    MPM.add(createEarlyCSEPass());
+    MPM.add(createCorrelatedValuePropagationPass());
+    MPM.add(createInstructionCombiningPass());
+    MPM.add(createLICMPass());
+    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+    MPM.add(createCFGSimplificationPass());
+    MPM.add(createInstructionCombiningPass());
+  }
+
+  if (RunSLPAfterLoopVectorization) {
+    if (SLPVectorize) {
+      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
+      if (OptLevel > 1 && ExtraVectorizerPasses) {
+        MPM.add(createEarlyCSEPass());
+      }
+    }
+
+    if (BBVectorize) {
+      MPM.add(createBBVectorizePass());
+      MPM.add(createInstructionCombiningPass());
+      addExtensionsToPM(EP_Peephole, MPM);
+      if (OptLevel > 1 && UseGVNAfterVectorization)
+        MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+      else
+        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
+
+      // BBVectorize may have significantly shortened a loop body; unroll again.
+      if (!DisableUnrollLoops)
+        MPM.add(createLoopUnrollPass());
+    }
+  }
+
+  addExtensionsToPM(EP_Peephole, MPM);
+  MPM.add(createCFGSimplificationPass());
+  MPM.add(createInstructionCombiningPass());
+
+  if (!DisableUnrollLoops) {
+    MPM.add(createLoopUnrollPass());    // Unroll small loops
+
+    // LoopUnroll may generate some redundency to cleanup.
+    MPM.add(createInstructionCombiningPass());
+
+    // Runtime unrolling will introduce runtime check in loop prologue. If the
+    // unrolled loop is a inner loop, then the prologue will be inside the
+    // outer loop. LICM pass can help to promote the runtime check out if the
+    // checked value is loop invariant.
+    MPM.add(createLICMPass());
+  }
+
+  // After vectorization and unrolling, assume intrinsics may tell us more
+  // about pointer alignments.
+  MPM.add(createAlignmentFromAssumptionsPass());
+
+  if (!DisableUnitAtATime) {
+    // FIXME: We shouldn't bother with this anymore.
+    MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+
+    // GlobalOpt already deletes dead functions and globals, at -O2 try a
+    // late pass of GlobalDCE.  It is capable of deleting dead cycles.
+    if (OptLevel > 1) {
+      MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
+      MPM.add(createConstantMergePass());     // Merge dup global constants
+    }
+  }
+
+  if (MergeFunctions)
+    MPM.add(createMergeFunctionsPass());
+
+  addExtensionsToPM(EP_OptimizerLast, MPM);
+}
+
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
+  // Provide AliasAnalysis services for optimizations.
+  addInitialAliasAnalysisPasses(PM);
+
+  if (FunctionIndex)
+    PM.add(createFunctionImportPass(FunctionIndex));
+
+  // Allow forcing function attributes as a debugging and tuning aid.
+  PM.add(createForceFunctionAttrsLegacyPass());
+
+  // Infer attributes about declarations if possible.
+  PM.add(createInferFunctionAttrsLegacyPass());
+
+  // Propagate constants at call sites into the functions they call.  This
+  // opens opportunities for globalopt (and inlining) by substituting function
+  // pointers passed as arguments to direct uses of functions.
+  PM.add(createIPSCCPPass());
+
+  // Now that we internalized some globals, see if we can hack on them!
+  PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+  PM.add(createGlobalOptimizerPass());
+  // Promote any localized global vars.
+  PM.add(createPromoteMemoryToRegisterPass());
+
+  // Linking modules together can lead to duplicated global constants, only
+  // keep one copy of each constant.
+  PM.add(createConstantMergePass());
+
+  // Remove unused arguments from functions.
+  PM.add(createDeadArgEliminationPass());
+
+  // Reduce the code after globalopt and ipsccp.  Both can open up significant
+  // simplification opportunities, and both can propagate functions through
+  // function pointers.  When this happens, we often have to resolve varargs
+  // calls, etc, so let instcombine do this.
+  PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
+
+  // Inline small functions
+  bool RunInliner = Inliner;
+  if (RunInliner) {
+    PM.add(Inliner);
+    Inliner = nullptr;
+  }
+
+  PM.add(createPruneEHPass());   // Remove dead EH info.
+
+  // Optimize globals again if we ran the inliner.
+  if (RunInliner)
+    PM.add(createGlobalOptimizerPass());
+  PM.add(createGlobalDCEPass()); // Remove dead functions.
+
+  // If we didn't decide to inline a function, check to see if we can
+  // transform it to pass arguments by value instead of by reference.
+  PM.add(createArgumentPromotionPass());
+
+  // The IPO passes may leave cruft around.  Clean up after them.
+  PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
+  PM.add(createJumpThreadingPass());
+
+  // Break up allocas
+  if (UseNewSROA)
+    PM.add(createSROAPass());
+  else
+    PM.add(createScalarReplAggregatesPass());
+
+  // Run a few AA driven optimizations here and now, to cleanup the code.
+  PM.add(createFunctionAttrsPass()); // Add nocapture.
+  PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
+
+  PM.add(createLICMPass());                 // Hoist loop invariants.
+  if (EnableMLSM)
+    PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+  PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+  PM.add(createMemCpyOptPass());            // Remove dead memcpys.
+
+  // Nuke dead stores.
+  PM.add(createDeadStoreEliminationPass());
+
+  // More loops are countable; try to optimize them.
+  PM.add(createIndVarSimplifyPass());
+  PM.add(createLoopDeletionPass());
+  if (EnableLoopInterchange)
+    PM.add(createLoopInterchangePass());
+
+  PM.add(createLoopVectorizePass(true, LoopVectorize));
+
+  // Now that we've optimized loops (in particular loop induction variables),
+  // we may have exposed more scalar opportunities. Run parts of the scalar
+  // optimizer again at this point.
+  PM.add(createInstructionCombiningPass()); // Initial cleanup
+  PM.add(createCFGSimplificationPass()); // if-convert
+  PM.add(createSCCPPass()); // Propagate exposed constants
+  PM.add(createInstructionCombiningPass()); // Clean up again
+  PM.add(createBitTrackingDCEPass());
+
+  // More scalar chains could be vectorized due to more alias information
+  if (RunSLPAfterLoopVectorization)
+    if (SLPVectorize)
+      PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+  // After vectorization, assume intrinsics may tell us more about pointer
+  // alignments.
+  PM.add(createAlignmentFromAssumptionsPass());
+
+  if (LoadCombine)
+    PM.add(createLoadCombinePass());
+
+  // Cleanup and simplify the code after the scalar optimizations.
+  PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
+
+  PM.add(createJumpThreadingPass());
+}
+
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+    legacy::PassManagerBase &PM) {
+  // Delete basic blocks, which optimization passes may have killed.
+  PM.add(createCFGSimplificationPass());
+
+  // Drop bodies of available externally objects to improve GlobalDCE.
+  PM.add(createEliminateAvailableExternallyPass());
+
+  // Now that we have optimized the program, discard unreachable functions.
+  PM.add(createGlobalDCEPass());
+
+  // FIXME: this is profitable (for compiler time) to do at -O0 too, but
+  // currently it damages debug info.
+  if (MergeFunctions)
+    PM.add(createMergeFunctionsPass());
+}
+
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
+  if (LibraryInfo)
+    PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+  if (VerifyInput)
+    PM.add(createVerifierPass());
+
+  if (OptLevel > 1)
+    addLTOOptimizationPasses(PM);
+
+  // Create a function that performs CFI checks for cross-DSO calls with targets
+  // in the current module.
+  PM.add(createCrossDSOCFIPass());
+
+  // Lower bit sets to globals. This pass supports Clang's control flow
+  // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+  // is enabled. The pass does nothing if CFI is disabled.
+  PM.add(createLowerBitSetsPass());
+
+  if (OptLevel != 0)
+    addLateLTOOptimizationPasses(PM);
+
+  if (VerifyOutput)
+    PM.add(createVerifierPass());
+}
+
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+    return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+  return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
+  PassManagerBuilder *PMB = new PassManagerBuilder();
+  return wrap(PMB);
+}
+
+void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  delete Builder;
+}
+
+void
+LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
+                                  unsigned OptLevel) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  Builder->OptLevel = OptLevel;
+}
+
+void
+LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
+                                   unsigned SizeLevel) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  Builder->SizeLevel = SizeLevel;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
+                                            LLVMBool Value) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  Builder->DisableUnitAtATime = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
+                                            LLVMBool Value) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  Builder->DisableUnrollLoops = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
+                                                 LLVMBool Value) {
+  // NOTE: The simplify-libcalls pass has been removed.
+}
+
+void
+LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
+                                              unsigned Threshold) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  Builder->Inliner = createFunctionInliningPass(Threshold);
+}
+
+void
+LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
+                                                  LLVMPassManagerRef PM) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
+  Builder->populateFunctionPassManager(*FPM);
+}
+
+void
+LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
+                                                LLVMPassManagerRef PM) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  legacy::PassManagerBase *MPM = unwrap(PM);
+  Builder->populateModulePassManager(*MPM);
+}
+
+void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
+                                                  LLVMPassManagerRef PM,
+                                                  LLVMBool Internalize,
+                                                  LLVMBool RunInliner) {
+  PassManagerBuilder *Builder = unwrap(PMB);
+  legacy::PassManagerBase *LPM = unwrap(PM);
+
+  // A small backwards compatibility hack. populateLTOPassManager used to take
+  // an RunInliner option.
+  if (RunInliner && !Builder->Inliner)
+    Builder->Inliner = createFunctionInliningPass();
+
+  Builder->populateLTOPassManager(*LPM);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 0000000..3af4afb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,273 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "prune-eh"
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+  struct PruneEH : public CallGraphSCCPass {
+    static char ID; // Pass identification, replacement for typeid
+    PruneEH() : CallGraphSCCPass(ID) {
+      initializePruneEHPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnSCC - Analyze the SCC, performing the transformation if possible.
+    bool runOnSCC(CallGraphSCC &SCC) override;
+
+    bool SimplifyFunction(Function *F);
+    void DeleteBasicBlock(BasicBlock *BB);
+  };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+  SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+  bool MadeChange = false;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert(*I);
+
+  // First pass, scan all of the functions in the SCC, simplifying them
+  // according to what we know.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+
+  // Next, check to see if any callees might throw or if there are any external
+  // functions in this SCC: if so, we cannot prune any functions in this SCC.
+  // Definitions that are weak and not declared non-throwing might be 
+  // overridden at linktime with something that throws, so assume that.
+  // If this SCC includes the unwind instruction, we KNOW it throws, so
+  // obviously the SCC might throw.
+  //
+  bool SCCMightUnwind = false, SCCMightReturn = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (!F) {
+      SCCMightUnwind = true;
+      SCCMightReturn = true;
+    } else if (F->isDeclaration() || F->mayBeOverridden()) {
+      SCCMightUnwind |= !F->doesNotThrow();
+      SCCMightReturn |= !F->doesNotReturn();
+    } else {
+      bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+      bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+      // Determine if we should scan for InlineAsm in a naked function as it
+      // is the only way to return without a ReturnInst.  Only do this for
+      // no-inline functions as functions which may be inlined cannot
+      // meaningfully return via assembly.
+      bool CheckReturnViaAsm = CheckReturn &&
+                               F->hasFnAttribute(Attribute::Naked) &&
+                               F->hasFnAttribute(Attribute::NoInline);
+
+      if (!CheckUnwind && !CheckReturn)
+        continue;
+
+      for (const BasicBlock &BB : *F) {
+        const TerminatorInst *TI = BB.getTerminator();
+        if (CheckUnwind && TI->mayThrow()) {
+          SCCMightUnwind = true;
+        } else if (CheckReturn && isa<ReturnInst>(TI)) {
+          SCCMightReturn = true;
+        }
+
+        for (const Instruction &I : BB) {
+          if ((!CheckUnwind || SCCMightUnwind) &&
+              (!CheckReturnViaAsm || SCCMightReturn))
+            break;
+
+          // Check to see if this function performs an unwind or calls an
+          // unwinding function.
+          if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) {
+            bool InstMightUnwind = true;
+            if (const auto *CI = dyn_cast<CallInst>(&I)) {
+              if (Function *Callee = CI->getCalledFunction()) {
+                CallGraphNode *CalleeNode = CG[Callee];
+                // If the callee is outside our current SCC then we may throw
+                // because it might.  If it is inside, do nothing.
+                if (SCCNodes.count(CalleeNode) > 0)
+                  InstMightUnwind = false;
+              }
+            }
+            SCCMightUnwind |= InstMightUnwind;
+          }
+          if (CheckReturnViaAsm && !SCCMightReturn)
+            if (auto ICS = ImmutableCallSite(&I))
+              if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue()))
+                if (IA->hasSideEffects())
+                  SCCMightReturn = true;
+        }
+
+        if (SCCMightUnwind && SCCMightReturn)
+          break;
+      }
+    }
+  }
+
+  // If the SCC doesn't unwind or doesn't throw, note this fact.
+  if (!SCCMightUnwind || !SCCMightReturn)
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      Function *F = (*I)->getFunction();
+
+      if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+        F->addFnAttr(Attribute::NoUnwind);
+        MadeChange = true;
+      }
+
+      if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+        F->addFnAttr(Attribute::NoReturn);
+        MadeChange = true;
+      }
+    }
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    // Convert any invoke instructions to non-throwing functions in this node
+    // into call instructions with a branch.  This makes the exception blocks
+    // dead.
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+  }
+
+  return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+  bool MadeChange = false;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+      if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
+        SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+        SmallVector<OperandBundleDef, 1> OpBundles;
+        II->getOperandBundlesAsDefs(OpBundles);
+
+        // Insert a call instruction before the invoke.
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+                                          "", II);
+        Call->takeName(II);
+        Call->setCallingConv(II->getCallingConv());
+        Call->setAttributes(II->getAttributes());
+        Call->setDebugLoc(II->getDebugLoc());
+
+        // Anything that used the value produced by the invoke instruction
+        // now uses the value produced by the call instruction.  Note that we
+        // do this even for void functions and calls with no uses so that the
+        // callgraph edge is updated.
+        II->replaceAllUsesWith(Call);
+        BasicBlock *UnwindBlock = II->getUnwindDest();
+        UnwindBlock->removePredecessor(II->getParent());
+
+        // Insert a branch to the normal destination right before the
+        // invoke.
+        BranchInst::Create(II->getNormalDest(), II);
+
+        // Finally, delete the invoke instruction!
+        BB->getInstList().pop_back();
+
+        // If the unwind block is now dead, nuke it.
+        if (pred_empty(UnwindBlock))
+          DeleteBasicBlock(UnwindBlock);  // Delete the new BB.
+
+        ++NumRemoved;
+        MadeChange = true;
+      }
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+      if (CallInst *CI = dyn_cast<CallInst>(I++))
+        if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+          // This call calls a function that cannot return.  Insert an
+          // unreachable instruction after it and simplify the code.  Do this
+          // by splitting the BB, adding the unreachable, then deleting the
+          // new BB.
+          BasicBlock *New = BB->splitBasicBlock(I);
+
+          // Remove the uncond branch and add an unreachable.
+          BB->getInstList().pop_back();
+          new UnreachableInst(BB->getContext(), &*BB);
+
+          DeleteBasicBlock(New);  // Delete the new BB.
+          MadeChange = true;
+          ++NumUnreach;
+          break;
+        }
+  }
+
+  return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+  assert(pred_empty(BB) && "BB is not dead!");
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+  CallGraphNode *CGN = CG[BB->getParent()];
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+    --I;
+    if (CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(I))
+        CGN->removeCallEdgeFor(CI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+      CGN->removeCallEdgeFor(II);
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  }
+
+  // Get the list of successors of this block.
+  std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    Succs[i]->removePredecessor(BB);
+
+  BB->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
new file mode 100644
index 0000000..928d92e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -0,0 +1,1265 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+//      to indicate the weights of each edge coming out of the branch.
+//      The weight of each edge is the weight of the target block for
+//      that edge. The weight of a block B is computed as the maximum
+//      number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <cctype>
+
+using namespace llvm;
+using namespace sampleprof;
+
+#define DEBUG_TYPE "sample-profile"
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+    "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+    cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+    "sample-profile-max-propagate-iterations", cl::init(100),
+    cl::desc("Maximum number of iterations to go through when propagating "
+             "sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+    "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+    cl::desc("Emit a warning if less than N% of records in the input profile "
+             "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+    "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+    cl::desc("Emit a warning if less than N% of samples in the input profile "
+             "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+    "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+    cl::desc("Inlined functions that account for more than N% of all samples "
+             "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+    "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+    cl::desc("Top-level functions that account for more than N% of all samples "
+             "collected in the profile, will be marked as hot for the inliner "
+             "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+    "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+    cl::desc("Top-level functions that account for less than N% of all samples "
+             "collected in the profile, will be marked as cold for the inliner "
+             "to consider."));
+
+namespace {
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+    BlockEdgeMap;
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public ModulePass {
+public:
+  // Class identification, replacement for typeinfo
+  static char ID;
+
+  SampleProfileLoader(StringRef Name = SampleProfileFile)
+      : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+        Samples(nullptr), Filename(Name), ProfileIsValid(false),
+        TotalCollectedSamples(0) {
+    initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool doInitialization(Module &M) override;
+
+  void dump() { Reader->dump(); }
+
+  const char *getPassName() const override { return "Sample profile pass"; }
+
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+
+protected:
+  bool runOnFunction(Function &F);
+  unsigned getFunctionLoc(Function &F);
+  bool emitAnnotations(Function &F);
+  ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+  const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+  bool inlineHotFunctions(Function &F);
+  bool emitInlineHints(Function &F);
+  void printEdgeWeight(raw_ostream &OS, Edge E);
+  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
+  bool computeBlockWeights(Function &F);
+  void findEquivalenceClasses(Function &F);
+  void findEquivalencesFor(BasicBlock *BB1,
+                           SmallVector<BasicBlock *, 8> Descendants,
+                           DominatorTreeBase<BasicBlock> *DomTree);
+  void propagateWeights(Function &F);
+  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+  void buildEdges(Function &F);
+  bool propagateThroughEdges(Function &F);
+  void computeDominanceAndLoopInfo(Function &F);
+  unsigned getOffset(unsigned L, unsigned H) const;
+  void clearFunctionData();
+
+  /// \brief Map basic blocks to their computed weights.
+  ///
+  /// The weight of a basic block is defined to be the maximum
+  /// of all the instruction weights in that block.
+  BlockWeightMap BlockWeights;
+
+  /// \brief Map edges to their computed weights.
+  ///
+  /// Edge weights are computed by propagating basic block weights in
+  /// SampleProfile::propagateWeights.
+  EdgeWeightMap EdgeWeights;
+
+  /// \brief Set of visited blocks during propagation.
+  SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
+
+  /// \brief Set of visited edges during propagation.
+  SmallSet<Edge, 128> VisitedEdges;
+
+  /// \brief Equivalence classes for block weights.
+  ///
+  /// Two blocks BB1 and BB2 are in the same equivalence class if they
+  /// dominate and post-dominate each other, and they are in the same loop
+  /// nest. When this happens, the two blocks are guaranteed to execute
+  /// the same number of times.
+  EquivalenceClassMap EquivalenceClass;
+
+  /// \brief Dominance, post-dominance and loop information.
+  std::unique_ptr<DominatorTree> DT;
+  std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+  std::unique_ptr<LoopInfo> LI;
+
+  /// \brief Predecessors for each basic block in the CFG.
+  BlockEdgeMap Predecessors;
+
+  /// \brief Successors for each basic block in the CFG.
+  BlockEdgeMap Successors;
+
+  /// \brief Profile reader object.
+  std::unique_ptr<SampleProfileReader> Reader;
+
+  /// \brief Samples collected for the body of this function.
+  FunctionSamples *Samples;
+
+  /// \brief Name of the profile file to load.
+  StringRef Filename;
+
+  /// \brief Flag indicating whether the profile input loaded successfully.
+  bool ProfileIsValid;
+
+  /// \brief Total number of samples collected in this profile.
+  ///
+  /// This is the sum of all the samples collected in all the functions executed
+  /// at runtime.
+  uint64_t TotalCollectedSamples;
+};
+
+class SampleCoverageTracker {
+public:
+  SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+                       uint32_t Discriminator, uint64_t Samples);
+  unsigned computeCoverage(unsigned Used, unsigned Total) const;
+  unsigned countUsedRecords(const FunctionSamples *FS) const;
+  unsigned countBodyRecords(const FunctionSamples *FS) const;
+  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+  uint64_t countBodySamples(const FunctionSamples *FS) const;
+  void clear() {
+    SampleCoverage.clear();
+    TotalUsedSamples = 0;
+  }
+
+private:
+  typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+  typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+      FunctionSamplesCoverageMap;
+
+  /// Coverage map for sampling records.
+  ///
+  /// This map keeps a record of sampling records that have been matched to
+  /// an IR instruction. This is used to detect some form of staleness in
+  /// profiles (see flag -sample-profile-check-coverage).
+  ///
+  /// Each entry in the map corresponds to a FunctionSamples instance.  This is
+  /// another map that counts how many times the sample record at the
+  /// given location has been used.
+  FunctionSamplesCoverageMap SampleCoverage;
+
+  /// Number of samples used from the profile.
+  ///
+  /// When a sampling record is used for the first time, the samples from
+  /// that record are added to this accumulator.  Coverage is later computed
+  /// based on the total number of samples available in this function and
+  /// its callsites.
+  ///
+  /// Note that this accumulator tracks samples used from a single function
+  /// and all the inlined callsites. Strictly, we should have a map of counters
+  /// keyed by FunctionSamples pointers, but these stats are cleared after
+  /// every function, so we just need to keep a single counter.
+  uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+                   const FunctionSamples *CallsiteFS) {
+  if (!CallsiteFS)
+    return false; // The callsite was not inlined in the original binary.
+
+  uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+  if (ParentTotalSamples == 0)
+    return false; // Avoid division by zero.
+
+  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+  if (CallsiteTotalSamples == 0)
+    return false; // Callsite is trivially cold.
+
+  double PercentSamples =
+      (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+  return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+                                            uint32_t LineOffset,
+                                            uint32_t Discriminator,
+                                            uint64_t Samples) {
+  LineLocation Loc(LineOffset, Discriminator);
+  unsigned &Count = SampleCoverage[FS][Loc];
+  bool FirstTime = (++Count == 1);
+  if (FirstTime)
+    TotalUsedSamples += Samples;
+  return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+  auto I = SampleCoverage.find(FS);
+
+  // The size of the coverage map for FS represents the number of records
+  // that were marked used at least once.
+  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+  // If there are inlined callsites in this function, count the samples found
+  // in the respective bodies. However, do not bother counting callees with 0
+  // total samples, these are callees that were never invoked at runtime.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Count += countUsedRecords(CalleeSamples);
+  }
+
+  return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+  unsigned Count = FS->getBodySamples().size();
+
+  // Only count records in hot callsites.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Count += countBodyRecords(CalleeSamples);
+  }
+
+  return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+  uint64_t Total = 0;
+  for (const auto &I : FS->getBodySamples())
+    Total += I.second.getSamples();
+
+  // Only count samples in hot callsites.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Total += countBodySamples(CalleeSamples);
+  }
+
+  return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+                                                unsigned Total) const {
+  assert(Used <= Total &&
+         "number of used records cannot exceed the total number of records");
+  return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+  BlockWeights.clear();
+  EdgeWeights.clear();
+  VisitedBlocks.clear();
+  VisitedEdges.clear();
+  EquivalenceClass.clear();
+  DT = nullptr;
+  PDT = nullptr;
+  LI = nullptr;
+  Predecessors.clear();
+  Successors.clear();
+  CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L  Lineno
+/// \param H  Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+  return (L - H) & 0xffff;
+}
+
+/// \brief Print the weight of edge \p E on stream \p OS.
+///
+/// \param OS  Stream to emit the output to.
+/// \param E  Edge to print.
+void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
+  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
+     << "]: " << EdgeWeights[E] << "\n";
+}
+
+/// \brief Print the equivalence class of block \p BB on stream \p OS.
+///
+/// \param OS  Stream to emit the output to.
+/// \param BB  Block to print.
+void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
+                                                const BasicBlock *BB) {
+  const BasicBlock *Equiv = EquivalenceClass[BB];
+  OS << "equivalence[" << BB->getName()
+     << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
+}
+
+/// \brief Print the weight of block \p BB on stream \p OS.
+///
+/// \param OS  Stream to emit the output to.
+/// \param BB  Block to print.
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+                                           const BasicBlock *BB) const {
+  const auto &I = BlockWeights.find(BB);
+  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+  OS << "weight[" << BB->getName() << "]: " << W << "\n";
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use HeaderLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using BodySamples.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
+  DebugLoc DLoc = Inst.getDebugLoc();
+  if (!DLoc)
+    return std::error_code();
+
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  if (!FS)
+    return std::error_code();
+
+  const DILocation *DIL = DLoc;
+  unsigned Lineno = DLoc.getLine();
+  unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+  uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+  uint32_t Discriminator = DIL->getDiscriminator();
+  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+  if (R) {
+    bool FirstMark =
+        CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+    if (FirstMark) {
+      const Function *F = Inst.getParent()->getParent();
+      LLVMContext &Ctx = F->getContext();
+      emitOptimizationRemark(
+          Ctx, DEBUG_TYPE, *F, DLoc,
+          Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+              Twine(LineOffset) +
+              ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+    }
+    DEBUG(dbgs() << "    " << Lineno << "." << DIL->getDiscriminator() << ":"
+                 << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+                 << DIL->getDiscriminator() << " - weight: " << R.get()
+                 << ")\n");
+  }
+  return R;
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p BB is the maximum weight of all the
+/// instructions in BB.
+///
+/// \param BB The basic block to query.
+///
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+  bool Found = false;
+  uint64_t Weight = 0;
+  for (auto &I : BB->getInstList()) {
+    const ErrorOr<uint64_t> &R = getInstWeight(I);
+    if (R && R.get() >= Weight) {
+      Weight = R.get();
+      Found = true;
+    }
+  }
+  if (Found)
+    return Weight;
+  else
+    return std::error_code();
+}
+
+/// \brief Compute and store the weights of every basic block.
+///
+/// This populates the BlockWeights map by computing
+/// the weights of every basic block in the CFG.
+///
+/// \param F The function to query.
+bool SampleProfileLoader::computeBlockWeights(Function &F) {
+  bool Changed = false;
+  DEBUG(dbgs() << "Block weights\n");
+  for (const auto &BB : F) {
+    ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+    if (Weight) {
+      BlockWeights[&BB] = Weight.get();
+      VisitedBlocks.insert(&BB);
+      Changed = true;
+    }
+    DEBUG(printBlockWeight(dbgs(), &BB));
+  }
+
+  return Changed;
+}
+
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+  const DILocation *DIL = Inst.getDebugLoc();
+  if (!DIL) {
+    return nullptr;
+  }
+  DISubprogram *SP = DIL->getScope()->getSubprogram();
+  if (!SP)
+    return nullptr;
+
+  Function *CalleeFunc = Inst.getCalledFunction();
+  if (!CalleeFunc) {
+    return nullptr;
+  }
+
+  StringRef CalleeName = CalleeFunc->getName();
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  if (FS == nullptr)
+    return nullptr;
+
+  return FS->findFunctionSamplesAt(
+      CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+                       DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+  SmallVector<CallsiteLocation, 10> S;
+  const DILocation *DIL = Inst.getDebugLoc();
+  if (!DIL) {
+    return Samples;
+  }
+  StringRef CalleeName;
+  for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+       DIL = DIL->getInlinedAt()) {
+    DISubprogram *SP = DIL->getScope()->getSubprogram();
+    if (!SP)
+      return nullptr;
+    if (!CalleeName.empty()) {
+      S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+                                   DIL->getDiscriminator(), CalleeName));
+    }
+    CalleeName = SP->getLinkageName();
+  }
+  if (S.size() == 0)
+    return Samples;
+  const FunctionSamples *FS = Samples;
+  for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+    FS = FS->findFunctionSamplesAt(S[i]);
+  }
+  return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+  if (TotalCollectedSamples == 0)
+    return false;
+
+  uint64_t FunctionSamples = Samples->getTotalSamples();
+  double SamplesPercent =
+      (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+  // If the function collected more samples than the hot threshold, mark
+  // it globally hot.
+  if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+    F.addFnAttr(llvm::Attribute::InlineHint);
+    std::string Msg;
+    raw_string_ostream S(Msg);
+    S << "Applied inline hint to globally hot function '" << F.getName()
+      << "' with " << format("%.2f", SamplesPercent)
+      << "% of samples (threshold: "
+      << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+    S.flush();
+    emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+    return true;
+  }
+
+  // If the function collected fewer samples than the cold threshold, mark
+  // it globally cold.
+  if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+    F.addFnAttr(llvm::Attribute::Cold);
+    std::string Msg;
+    raw_string_ostream S(Msg);
+    S << "Applied cold hint to globally cold function '" << F.getName()
+      << "' with " << format("%.2f", SamplesPercent)
+      << "% of samples (threshold: "
+      << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+    S.flush();
+    emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+    return true;
+  }
+
+  return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+  bool Changed = false;
+  LLVMContext &Ctx = F.getContext();
+  while (true) {
+    bool LocalChanged = false;
+    SmallVector<CallInst *, 10> CIS;
+    for (auto &BB : F) {
+      for (auto &I : BB.getInstList()) {
+        CallInst *CI = dyn_cast<CallInst>(&I);
+        if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+          CIS.push_back(CI);
+      }
+    }
+    for (auto CI : CIS) {
+      InlineFunctionInfo IFI;
+      Function *CalledFunction = CI->getCalledFunction();
+      DebugLoc DLoc = CI->getDebugLoc();
+      uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+      if (InlineFunction(CI, IFI)) {
+        LocalChanged = true;
+        emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+                               Twine("inlined hot callee '") +
+                                   CalledFunction->getName() + "' with " +
+                                   Twine(NumSamples) + " samples into '" +
+                                   F.getName() + "'");
+      }
+    }
+    if (LocalChanged) {
+      Changed = true;
+    } else {
+      break;
+    }
+  }
+  return Changed;
+}
+
+/// \brief Find equivalence classes for the given block.
+///
+/// This finds all the blocks that are guaranteed to execute the same
+/// number of times as \p BB1. To do this, it traverses all the
+/// descendants of \p BB1 in the dominator or post-dominator tree.
+///
+/// A block BB2 will be in the same equivalence class as \p BB1 if
+/// the following holds:
+///
+/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
+///    is a descendant of \p BB1 in the dominator tree, then BB2 should
+///    dominate BB1 in the post-dominator tree.
+///
+/// 2- Both BB2 and \p BB1 must be in the same loop.
+///
+/// For every block BB2 that meets those two requirements, we set BB2's
+/// equivalence class to \p BB1.
+///
+/// \param BB1  Block to check.
+/// \param Descendants  Descendants of \p BB1 in either the dom or pdom tree.
+/// \param DomTree  Opposite dominator tree. If \p Descendants is filled
+///                 with blocks from \p BB1's dominator tree, then
+///                 this is the post-dominator tree, and vice versa.
+void SampleProfileLoader::findEquivalencesFor(
+    BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+    DominatorTreeBase<BasicBlock> *DomTree) {
+  const BasicBlock *EC = EquivalenceClass[BB1];
+  uint64_t Weight = BlockWeights[EC];
+  for (const auto *BB2 : Descendants) {
+    bool IsDomParent = DomTree->dominates(BB2, BB1);
+    bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
+    if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+      EquivalenceClass[BB2] = EC;
+
+      // If BB2 is heavier than BB1, make BB2 have the same weight
+      // as BB1.
+      //
+      // Note that we don't worry about the opposite situation here
+      // (when BB2 is lighter than BB1). We will deal with this
+      // during the propagation phase. Right now, we just want to
+      // make sure that BB1 has the largest weight of all the
+      // members of its equivalence set.
+      Weight = std::max(Weight, BlockWeights[BB2]);
+    }
+  }
+  BlockWeights[EC] = Weight;
+}
+
+/// \brief Find equivalence classes.
+///
+/// Since samples may be missing from blocks, we can fill in the gaps by setting
+/// the weights of all the blocks in the same equivalence class to the same
+/// weight. To compute the concept of equivalence, we use dominance and loop
+/// information. Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// \param F The function to query.
+void SampleProfileLoader::findEquivalenceClasses(Function &F) {
+  SmallVector<BasicBlock *, 8> DominatedBBs;
+  DEBUG(dbgs() << "\nBlock equivalence classes\n");
+  // Find equivalence sets based on dominance and post-dominance information.
+  for (auto &BB : F) {
+    BasicBlock *BB1 = &BB;
+
+    // Compute BB1's equivalence class once.
+    if (EquivalenceClass.count(BB1)) {
+      DEBUG(printBlockEquivalence(dbgs(), BB1));
+      continue;
+    }
+
+    // By default, blocks are in their own equivalence class.
+    EquivalenceClass[BB1] = BB1;
+
+    // Traverse all the blocks dominated by BB1. We are looking for
+    // every basic block BB2 such that:
+    //
+    // 1- BB1 dominates BB2.
+    // 2- BB2 post-dominates BB1.
+    // 3- BB1 and BB2 are in the same loop nest.
+    //
+    // If all those conditions hold, it means that BB2 is executed
+    // as many times as BB1, so they are placed in the same equivalence
+    // class by making BB2's equivalence class be BB1.
+    DominatedBBs.clear();
+    DT->getDescendants(BB1, DominatedBBs);
+    findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+
+    DEBUG(printBlockEquivalence(dbgs(), BB1));
+  }
+
+  // Assign weights to equivalence classes.
+  //
+  // All the basic blocks in the same equivalence class will execute
+  // the same number of times. Since we know that the head block in
+  // each equivalence class has the largest weight, assign that weight
+  // to all the blocks in that equivalence class.
+  DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
+  for (auto &BI : F) {
+    const BasicBlock *BB = &BI;
+    const BasicBlock *EquivBB = EquivalenceClass[BB];
+    if (BB != EquivBB)
+      BlockWeights[BB] = BlockWeights[EquivBB];
+    DEBUG(printBlockWeight(dbgs(), BB));
+  }
+}
+
+/// \brief Visit the given edge to decide if it has a valid weight.
+///
+/// If \p E has not been visited before, we copy to \p UnknownEdge
+/// and increment the count of unknown edges.
+///
+/// \param E  Edge to visit.
+/// \param NumUnknownEdges  Current number of unknown edges.
+/// \param UnknownEdge  Set if E has not been visited before.
+///
+/// \returns E's weight, if known. Otherwise, return 0.
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+                                        Edge *UnknownEdge) {
+  if (!VisitedEdges.count(E)) {
+    (*NumUnknownEdges)++;
+    *UnknownEdge = E;
+    return 0;
+  }
+
+  return EdgeWeights[E];
+}
+
+/// \brief Propagate weights through incoming/outgoing edges.
+///
+/// If the weight of a basic block is known, and there is only one edge
+/// with an unknown weight, we can calculate the weight of that edge.
+///
+/// Similarly, if all the edges have a known count, we can calculate the
+/// count of the basic block, if needed.
+///
+/// \param F  Function to process.
+///
+/// \returns  True if new weights were assigned to edges or blocks.
+bool SampleProfileLoader::propagateThroughEdges(Function &F) {
+  bool Changed = false;
+  DEBUG(dbgs() << "\nPropagation through edges\n");
+  for (const auto &BI : F) {
+    const BasicBlock *BB = &BI;
+    const BasicBlock *EC = EquivalenceClass[BB];
+
+    // Visit all the predecessor and successor edges to determine
+    // which ones have a weight assigned already. Note that it doesn't
+    // matter that we only keep track of a single unknown edge. The
+    // only case we are interested in handling is when only a single
+    // edge is unknown (see setEdgeOrBlockWeight).
+    for (unsigned i = 0; i < 2; i++) {
+      uint64_t TotalWeight = 0;
+      unsigned NumUnknownEdges = 0;
+      Edge UnknownEdge, SelfReferentialEdge;
+
+      if (i == 0) {
+        // First, visit all predecessor edges.
+        for (auto *Pred : Predecessors[BB]) {
+          Edge E = std::make_pair(Pred, BB);
+          TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+          if (E.first == E.second)
+            SelfReferentialEdge = E;
+        }
+      } else {
+        // On the second round, visit all successor edges.
+        for (auto *Succ : Successors[BB]) {
+          Edge E = std::make_pair(BB, Succ);
+          TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+        }
+      }
+
+      // After visiting all the edges, there are three cases that we
+      // can handle immediately:
+      //
+      // - All the edge weights are known (i.e., NumUnknownEdges == 0).
+      //   In this case, we simply check that the sum of all the edges
+      //   is the same as BB's weight. If not, we change BB's weight
+      //   to match. Additionally, if BB had not been visited before,
+      //   we mark it visited.
+      //
+      // - Only one edge is unknown and BB has already been visited.
+      //   In this case, we can compute the weight of the edge by
+      //   subtracting the total block weight from all the known
+      //   edge weights. If the edges weight more than BB, then the
+      //   edge of the last remaining edge is set to zero.
+      //
+      // - There exists a self-referential edge and the weight of BB is
+      //   known. In this case, this edge can be based on BB's weight.
+      //   We add up all the other known edges and set the weight on
+      //   the self-referential edge as we did in the previous case.
+      //
+      // In any other case, we must continue iterating. Eventually,
+      // all edges will get a weight, or iteration will stop when
+      // it reaches SampleProfileMaxPropagateIterations.
+      if (NumUnknownEdges <= 1) {
+        uint64_t &BBWeight = BlockWeights[EC];
+        if (NumUnknownEdges == 0) {
+          // If we already know the weight of all edges, the weight of the
+          // basic block can be computed. It should be no larger than the sum
+          // of all edge weights.
+          if (TotalWeight > BBWeight) {
+            BBWeight = TotalWeight;
+            Changed = true;
+            DEBUG(dbgs() << "All edge weights for " << BB->getName()
+                         << " known. Set weight for block: ";
+                  printBlockWeight(dbgs(), BB););
+          }
+          if (VisitedBlocks.insert(EC).second)
+            Changed = true;
+        } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
+          // If there is a single unknown edge and the block has been
+          // visited, then we can compute E's weight.
+          if (BBWeight >= TotalWeight)
+            EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
+          else
+            EdgeWeights[UnknownEdge] = 0;
+          VisitedEdges.insert(UnknownEdge);
+          Changed = true;
+          DEBUG(dbgs() << "Set weight for edge: ";
+                printEdgeWeight(dbgs(), UnknownEdge));
+        }
+      } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+        uint64_t &BBWeight = BlockWeights[BB];
+        // We have a self-referential edge and the weight of BB is known.
+        if (BBWeight >= TotalWeight)
+          EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
+        else
+          EdgeWeights[SelfReferentialEdge] = 0;
+        VisitedEdges.insert(SelfReferentialEdge);
+        Changed = true;
+        DEBUG(dbgs() << "Set self-referential edge weight to: ";
+              printEdgeWeight(dbgs(), SelfReferentialEdge));
+      }
+    }
+  }
+
+  return Changed;
+}
+
+/// \brief Build in/out edge lists for each basic block in the CFG.
+///
+/// We are interested in unique edges. If a block B1 has multiple
+/// edges to another block B2, we only add a single B1->B2 edge.
+void SampleProfileLoader::buildEdges(Function &F) {
+  for (auto &BI : F) {
+    BasicBlock *B1 = &BI;
+
+    // Add predecessors for B1.
+    SmallPtrSet<BasicBlock *, 16> Visited;
+    if (!Predecessors[B1].empty())
+      llvm_unreachable("Found a stale predecessors list in a basic block.");
+    for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
+      BasicBlock *B2 = *PI;
+      if (Visited.insert(B2).second)
+        Predecessors[B1].push_back(B2);
+    }
+
+    // Add successors for B1.
+    Visited.clear();
+    if (!Successors[B1].empty())
+      llvm_unreachable("Found a stale successors list in a basic block.");
+    for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
+      BasicBlock *B2 = *SI;
+      if (Visited.insert(B2).second)
+        Successors[B1].push_back(B2);
+    }
+  }
+}
+
+/// \brief Propagate weights into edges
+///
+/// The following rules are applied to every block BB in the CFG:
+///
+/// - If BB has a single predecessor/successor, then the weight
+///   of that edge is the weight of the block.
+///
+/// - If all incoming or outgoing edges are known except one, and the
+///   weight of the block is already known, the weight of the unknown
+///   edge will be the weight of the block minus the sum of all the known
+///   edges. If the sum of all the known edges is larger than BB's weight,
+///   we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+///   known, the weight for that edge is set to the weight of the block
+///   minus the weight of the other incoming edges to that block (if
+///   known).
+void SampleProfileLoader::propagateWeights(Function &F) {
+  bool Changed = true;
+  unsigned I = 0;
+
+  // Add an entry count to the function using the samples gathered
+  // at the function entry.
+  F.setEntryCount(Samples->getHeadSamples());
+
+  // Before propagation starts, build, for each block, a list of
+  // unique predecessors and successors. This is necessary to handle
+  // identical edges in multiway branches. Since we visit all blocks and all
+  // edges of the CFG, it is cleaner to build these lists once at the start
+  // of the pass.
+  buildEdges(F);
+
+  // Propagate until we converge or we go past the iteration limit.
+  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+    Changed = propagateThroughEdges(F);
+  }
+
+  // Generate MD_prof metadata for every branch instruction using the
+  // edge weights computed during propagation.
+  DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
+  LLVMContext &Ctx = F.getContext();
+  MDBuilder MDB(Ctx);
+  for (auto &BI : F) {
+    BasicBlock *BB = &BI;
+    TerminatorInst *TI = BB->getTerminator();
+    if (TI->getNumSuccessors() == 1)
+      continue;
+    if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+      continue;
+
+    DEBUG(dbgs() << "\nGetting weights for branch at line "
+                 << TI->getDebugLoc().getLine() << ".\n");
+    SmallVector<uint32_t, 4> Weights;
+    uint32_t MaxWeight = 0;
+    DebugLoc MaxDestLoc;
+    for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
+      BasicBlock *Succ = TI->getSuccessor(I);
+      Edge E = std::make_pair(BB, Succ);
+      uint64_t Weight = EdgeWeights[E];
+      DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
+      // Use uint32_t saturated arithmetic to adjust the incoming weights,
+      // if needed. Sample counts in profiles are 64-bit unsigned values,
+      // but internally branch weights are expressed as 32-bit values.
+      if (Weight > std::numeric_limits<uint32_t>::max()) {
+        DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+        Weight = std::numeric_limits<uint32_t>::max();
+      }
+      Weights.push_back(static_cast<uint32_t>(Weight));
+      if (Weight != 0) {
+        if (Weight > MaxWeight) {
+          MaxWeight = Weight;
+          MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+        }
+      }
+    }
+
+    // Only set weights if there is at least one non-zero weight.
+    // In any other case, let the analyzer set weights.
+    if (MaxWeight > 0) {
+      DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
+      TI->setMetadata(llvm::LLVMContext::MD_prof,
+                      MDB.createBranchWeights(Weights));
+      DebugLoc BranchLoc = TI->getDebugLoc();
+      emitOptimizationRemark(
+          Ctx, DEBUG_TYPE, F, MaxDestLoc,
+          Twine("most popular destination for conditional branches at ") +
+              ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+                                   Twine(BranchLoc.getLine()) + ":" +
+                                   Twine(BranchLoc.getCol()))
+                           : Twine("<UNKNOWN LOCATION>")));
+    } else {
+      DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+    }
+  }
+}
+
+/// \brief Get the line number for the function header.
+///
+/// This looks up function \p F in the current compilation unit and
+/// retrieves the line number where the function is defined. This is
+/// line 0 for all the samples read from the profile file. Every line
+/// number is relative to this line.
+///
+/// \param F  Function object to query.
+///
+/// \returns the line number where \p F is defined. If it returns 0,
+///          it means that there is no debug information available for \p F.
+unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
+  if (DISubprogram *S = getDISubprogram(&F))
+    return S->getLine();
+
+  // If the start of \p F is missing, emit a diagnostic to inform the user
+  // about the missed opportunity.
+  F.getContext().diagnose(DiagnosticInfoSampleProfile(
+      "No debug information found in function " + F.getName() +
+          ": Function profile not used",
+      DS_Warning));
+  return 0;
+}
+
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+  DT.reset(new DominatorTree);
+  DT->recalculate(F);
+
+  PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+  PDT->recalculate(F);
+
+  LI.reset(new LoopInfo);
+  LI->analyze(*DT);
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// Branch weights are computed out of instruction samples using a
+/// propagation heuristic. Propagation proceeds in 3 phases:
+///
+/// 1- Assignment of block weights. All the basic blocks in the function
+///    are initial assigned the same weight as their most frequently
+///    executed instruction.
+///
+/// 2- Creation of equivalence classes. Since samples may be missing from
+///    blocks, we can fill in the gaps by setting the weights of all the
+///    blocks in the same equivalence class to the same weight. To compute
+///    the concept of equivalence, we use dominance and loop information.
+///    Two blocks B1 and B2 are in the same equivalence class if B1
+///    dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// 3- Propagation of block weights into edges. This uses a simple
+///    propagation heuristic. The following rules are applied to every
+///    block BB in the CFG:
+///
+///    - If BB has a single predecessor/successor, then the weight
+///      of that edge is the weight of the block.
+///
+///    - If all the edges are known except one, and the weight of the
+///      block is already known, the weight of the unknown edge will
+///      be the weight of the block minus the sum of all the known
+///      edges. If the sum of all the known edges is larger than BB's weight,
+///      we set the unknown edge weight to zero.
+///
+///    - If there is a self-referential edge, and the weight of the block is
+///      known, the weight for that edge is set to the weight of the block
+///      minus the weight of the other incoming edges to that block (if
+///      known).
+///
+/// Since this propagation is not guaranteed to finalize for every CFG, we
+/// only allow it to proceed for a limited number of iterations (controlled
+/// by -sample-profile-max-propagate-iterations).
+///
+/// FIXME: Try to replace this propagation heuristic with a scheme
+/// that is guaranteed to finalize. A work-list approach similar to
+/// the standard value propagation algorithm used by SSA-CCP might
+/// work here.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on BB using the computed values for each of its branches.
+///
+/// \param F The function to query.
+///
+/// \returns true if \p F was modified. Returns false, otherwise.
+bool SampleProfileLoader::emitAnnotations(Function &F) {
+  bool Changed = false;
+
+  if (getFunctionLoc(F) == 0)
+    return false;
+
+  DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
+               << ": " << getFunctionLoc(F) << "\n");
+
+  Changed |= emitInlineHints(F);
+
+  Changed |= inlineHotFunctions(F);
+
+  // Compute basic block weights.
+  Changed |= computeBlockWeights(F);
+
+  if (Changed) {
+    // Compute dominance and loop info needed for propagation.
+    computeDominanceAndLoopInfo(F);
+
+    // Find equivalence classes.
+    findEquivalenceClasses(F);
+
+    // Propagate weights to all edges.
+    propagateWeights(F);
+  }
+
+  // If coverage checking was requested, compute it now.
+  if (SampleProfileRecordCoverage) {
+    unsigned Used = CoverageTracker.countUsedRecords(Samples);
+    unsigned Total = CoverageTracker.countBodyRecords(Samples);
+    unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+    if (Coverage < SampleProfileRecordCoverage) {
+      F.getContext().diagnose(DiagnosticInfoSampleProfile(
+          getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+          Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+              Twine(Coverage) + "%) were applied",
+          DS_Warning));
+    }
+  }
+
+  if (SampleProfileSampleCoverage) {
+    uint64_t Used = CoverageTracker.getTotalUsedSamples();
+    uint64_t Total = CoverageTracker.countBodySamples(Samples);
+    unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+    if (Coverage < SampleProfileSampleCoverage) {
+      F.getContext().diagnose(DiagnosticInfoSampleProfile(
+          getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+          Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+              Twine(Coverage) + "%) were applied",
+          DS_Warning));
+    }
+  }
+  return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
+                      "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
+INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
+                    "Sample Profile loader", false, false)
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+  auto &Ctx = M.getContext();
+  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+  if (std::error_code EC = ReaderOrErr.getError()) {
+    std::string Msg = "Could not open profile: " + EC.message();
+    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+    return false;
+  }
+  Reader = std::move(ReaderOrErr.get());
+  ProfileIsValid = (Reader->read() == sampleprof_error::success);
+  return true;
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass() {
+  return new SampleProfileLoader(SampleProfileFile);
+}
+
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+  return new SampleProfileLoader(Name);
+}
+
+bool SampleProfileLoader::runOnModule(Module &M) {
+  if (!ProfileIsValid)
+    return false;
+
+  // Compute the total number of samples collected in this profile.
+  for (const auto &I : Reader->getProfiles())
+    TotalCollectedSamples += I.second.getTotalSamples();
+
+  bool retval = false;
+  for (auto &F : M)
+    if (!F.isDeclaration()) {
+      clearFunctionData();
+      retval |= runOnFunction(F);
+    }
+  return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+  Samples = Reader->getSamplesFor(F);
+  if (!Samples->empty())
+    return emitAnnotations(F);
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 0000000..c94cc7c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,84 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+static bool stripDeadPrototypes(Module &M) {
+  bool MadeChange = false;
+
+  // Erase dead function prototypes.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function *F = &*I++;
+    // Function must be a prototype and unused.
+    if (F->isDeclaration() && F->use_empty()) {
+      F->eraseFromParent();
+      ++NumDeadPrototypes;
+      MadeChange = true;
+    }
+  }
+
+  // Erase dead global var prototypes.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *GV = &*I++;
+    // Global must be a prototype and unused.
+    if (GV->isDeclaration() && GV->use_empty())
+      GV->eraseFromParent();
+  }
+
+  // Return an indication of whether we changed anything or not.
+  return MadeChange;
+}
+
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+  if (stripDeadPrototypes(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override {
+    return stripDeadPrototypes(M);
+  }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+                "Strip Unused Function Prototypes", false, false)
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+  return new StripDeadPrototypesLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 0000000..46f352f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,363 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+//
+//   * names for virtual registers
+//   * symbols for internal globals and functions
+//   * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+namespace {
+  class StripSymbols : public ModulePass {
+    bool OnlyDebugInfo;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripSymbols(bool ODI = false)
+      : ModulePass(ID), OnlyDebugInfo(ODI) {
+        initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripNonDebugSymbols : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripNonDebugSymbols()
+      : ModulePass(ID) {
+        initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDebugDeclare : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDebugDeclare()
+      : ModulePass(ID) {
+        initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDeadDebugInfo : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDeadDebugInfo()
+      : ModulePass(ID) {
+        initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+                "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+  return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+                "Strip all symbols, except dbg symbols, from a module",
+                false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+  return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+                "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+  return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+                "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+  return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+  for (User *U : V->users())
+    if (U != Usr)
+      return false;
+
+  return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+  assert(C->use_empty() && "Constant is not dead!");
+  SmallPtrSet<Constant*, 4> Operands;
+  for (Value *Op : C->operands())
+    if (OnlyUsedBy(Op, C))
+      Operands.insert(cast<Constant>(Op));
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (!GV->hasLocalLinkage()) return;   // Don't delete non-static globals.
+    GV->eraseFromParent();
+  }
+  else if (!isa<Function>(C))
+    if (isa<CompositeType>(C->getType()))
+      C->destroyConstant();
+
+  // If the constant referenced anything, see if we can delete it as well.
+  for (Constant *O : Operands)
+    RemoveDeadConstant(O);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+    Value *V = VI->getValue();
+    ++VI;
+    if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+        // Set name to "", removing from symbol table!
+        V->setName("");
+    }
+  }
+}
+
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+  TypeFinder StructTypes;
+  StructTypes.run(M, false);
+
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *STy = StructTypes[i];
+    if (STy->isLiteral() || STy->getName().empty()) continue;
+
+    if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+      continue;
+
+    STy->setName("");
+  }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
+  if (!LLVMUsed) return;
+  UsedValues.insert(LLVMUsed);
+
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV =
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+    StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+  }
+
+  // Remove all names from types.
+  StripTypeNames(M, PreserveDbgInfo);
+
+  return true;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+  bool Changed = false;
+  Changed |= StripDebugInfo(M);
+  if (!OnlyDebugInfo)
+    Changed |= StripSymbolNames(M, false);
+  return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+  return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+  Function *Declare = M.getFunction("llvm.dbg.declare");
+  std::vector<Constant*> DeadConstants;
+
+  if (Declare) {
+    while (!Declare->use_empty()) {
+      CallInst *CI = cast<CallInst>(Declare->user_back());
+      Value *Arg1 = CI->getArgOperand(0);
+      Value *Arg2 = CI->getArgOperand(1);
+      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+      CI->eraseFromParent();
+      if (Arg1->use_empty()) {
+        if (Constant *C = dyn_cast<Constant>(Arg1))
+          DeadConstants.push_back(C);
+        else
+          RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+      }
+      if (Arg2->use_empty())
+        if (Constant *C = dyn_cast<Constant>(Arg2))
+          DeadConstants.push_back(C);
+    }
+    Declare->eraseFromParent();
+  }
+
+  while (!DeadConstants.empty()) {
+    Constant *C = DeadConstants.back();
+    DeadConstants.pop_back();
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+      if (GV->hasLocalLinkage())
+        RemoveDeadConstant(GV);
+    } else
+      RemoveDeadConstant(C);
+  }
+
+  return true;
+}
+
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+  bool Changed = false;
+
+  LLVMContext &C = M.getContext();
+
+  // Find all debug info in F. This is actually overkill in terms of what we
+  // want to do, but we want to try and be as resilient as possible in the face
+  // of potential debug info changes by using the formal interfaces given to us
+  // as much as possible.
+  DebugInfoFinder F;
+  F.processModule(M);
+
+  // For each compile unit, find the live set of global variables/functions and
+  // replace the current list of potentially dead global variables/functions
+  // with the live list.
+  SmallVector<Metadata *, 64> LiveGlobalVariables;
+  SmallVector<Metadata *, 64> LiveSubprograms;
+  DenseSet<const MDNode *> VisitedSet;
+
+  std::set<DISubprogram *> LiveSPs;
+  for (Function &F : M) {
+    if (DISubprogram *SP = F.getSubprogram())
+      LiveSPs.insert(SP);
+  }
+
+  for (DICompileUnit *DIC : F.compile_units()) {
+    // Create our live subprogram list.
+    bool SubprogramChange = false;
+    for (DISubprogram *DISP : DIC->getSubprograms()) {
+      // Make sure we visit each subprogram only once.
+      if (!VisitedSet.insert(DISP).second)
+        continue;
+
+      // If the function referenced by DISP is not null, the function is live.
+      if (LiveSPs.count(DISP))
+        LiveSubprograms.push_back(DISP);
+      else
+        SubprogramChange = true;
+    }
+
+    // Create our live global variable list.
+    bool GlobalVariableChange = false;
+    for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
+      // Make sure we only visit each global variable only once.
+      if (!VisitedSet.insert(DIG).second)
+        continue;
+
+      // If the global variable referenced by DIG is not null, the global
+      // variable is live.
+      if (DIG->getVariable())
+        LiveGlobalVariables.push_back(DIG);
+      else
+        GlobalVariableChange = true;
+    }
+
+    // If we found dead subprograms or global variables, replace the current
+    // subprogram list/global variable list with our new live subprogram/global
+    // variable list.
+    if (SubprogramChange) {
+      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
+      Changed = true;
+    }
+
+    if (GlobalVariableChange) {
+      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
+      Changed = true;
+    }
+
+    // Reset lists for the next iteration.
+    LiveSubprograms.clear();
+    LiveGlobalVariables.clear();
+  }
+
+  return Changed;
+}