71 files changed, 54661 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 0000000..35f2e97
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,585 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified.  This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation.  Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+/// Allow disabling BasicAA from the AA results. This is particularly useful
+/// when testing to isolate a single AA implementation.
+static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
+                                    cl::init(false));
+
+AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) {
+  for (auto &AA : AAs)
+    AA->setAAResults(this);
+}
+
+AAResults &AAResults::operator=(AAResults &&Arg) {
+  AAs = std::move(Arg.AAs);
+  for (auto &AA : AAs)
+    AA->setAAResults(this);
+  return *this;
+}
+
+AAResults::~AAResults() {
+// FIXME; It would be nice to at least clear out the pointers back to this
+// aggregation here, but we end up with non-nesting lifetimes in the legacy
+// pass manager that prevent this from working. In the legacy pass manager
+// we'll end up with dangling references here in some cases.
+#if 0
+  for (auto &AA : AAs)
+    AA->setAAResults(nullptr);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+                             const MemoryLocation &LocB) {
+  for (const auto &AA : AAs) {
+    auto Result = AA->alias(LocA, LocB);
+    if (Result != MayAlias)
+      return Result;
+  }
+  return MayAlias;
+}
+
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+                                       bool OrLocal) {
+  for (const auto &AA : AAs)
+    if (AA->pointsToConstantMemory(Loc, OrLocal))
+      return true;
+
+  return false;
+}
+
+ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+  ModRefInfo Result = MRI_ModRef;
+
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
+  }
+
+  return Result;
+}
+
+ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+  // We may have two calls
+  if (auto CS = ImmutableCallSite(I)) {
+    // Check if the two calls modify the same memory
+    return getModRefInfo(Call, CS);
+  } else {
+    // Otherwise, check if the call modifies or references the
+    // location this memory access defines.  The best we can say
+    // is that if the call references what this instruction
+    // defines, it must be clobbered by this location.
+    const MemoryLocation DefLoc = MemoryLocation::get(I);
+    if (getModRefInfo(Call, DefLoc) != MRI_NoModRef)
+      return MRI_ModRef;
+  }
+  return MRI_NoModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+                                    const MemoryLocation &Loc) {
+  ModRefInfo Result = MRI_ModRef;
+
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
+  }
+
+  return Result;
+}
+
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  ModRefInfo Result = MRI_ModRef;
+
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
+  }
+
+  return Result;
+}
+
+FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
+
+  for (const auto &AA : AAs) {
+    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == FMRB_DoesNotAccessMemory)
+      return Result;
+  }
+
+  return Result;
+}
+
+FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
+  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
+
+  for (const auto &AA : AAs) {
+    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == FMRB_DoesNotAccessMemory)
+      return Result;
+  }
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper method implementation
+//===----------------------------------------------------------------------===//
+
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+                                    const MemoryLocation &Loc) {
+  // Be conservative in the face of volatile/atomic.
+  if (!L->isUnordered())
+    return MRI_ModRef;
+
+  // If the load address doesn't alias the given address, it doesn't read
+  // or write the specified memory.
+  if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
+    return MRI_NoModRef;
+
+  // Otherwise, a load just reads.
+  return MRI_Ref;
+}
+
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+                                    const MemoryLocation &Loc) {
+  // Be conservative in the face of volatile/atomic.
+  if (!S->isUnordered())
+    return MRI_ModRef;
+
+  if (Loc.Ptr) {
+    // If the store address cannot alias the pointer in question, then the
+    // specified memory cannot be modified by the store.
+    if (!alias(MemoryLocation::get(S), Loc))
+      return MRI_NoModRef;
+
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this store.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a store just writes.
+  return MRI_Mod;
+}
+
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+                                    const MemoryLocation &Loc) {
+
+  if (Loc.Ptr) {
+    // If the va_arg address cannot alias the pointer in question, then the
+    // specified memory cannot be accessed by the va_arg.
+    if (!alias(MemoryLocation::get(V), Loc))
+      return MRI_NoModRef;
+
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this va_arg.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a va_arg reads and writes.
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+                                    const MemoryLocation &Loc) {
+  if (Loc.Ptr) {
+    // If the pointer is a pointer to constant memory,
+    // then it could not have been modified by this catchpad.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a catchpad reads and writes.
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+                                    const MemoryLocation &Loc) {
+  if (Loc.Ptr) {
+    // If the pointer is a pointer to constant memory,
+    // then it could not have been modified by this catchpad.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a catchret reads and writes.
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+                                    const MemoryLocation &Loc) {
+  // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
+  if (CX->getSuccessOrdering() > Monotonic)
+    return MRI_ModRef;
+
+  // If the cmpxchg address does not alias the location, it does not access it.
+  if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
+    return MRI_NoModRef;
+
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+                                    const MemoryLocation &Loc) {
+  // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
+  if (RMW->getOrdering() > Monotonic)
+    return MRI_ModRef;
+
+  // If the atomicrmw address does not alias the location, it does not access it.
+  if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
+    return MRI_NoModRef;
+
+  return MRI_ModRef;
+}
+
+/// \brief Return information about whether a particular call site modifies
+/// or reads the specified memory location \p MemLoc before instruction \p I
+/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+/// instruction-ordering queries inside the BasicBlock containing \p I.
+/// FIXME: this is really just shoring-up a deficiency in alias analysis.
+/// BasicAA isn't willing to spend linear time determining whether an alloca
+/// was captured before or after this particular call, while we are. However,
+/// with a smarter AA in place, this test is just wasting compile time.
+ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
+                                         const MemoryLocation &MemLoc,
+                                         DominatorTree *DT,
+                                         OrderedBasicBlock *OBB) {
+  if (!DT)
+    return MRI_ModRef;
+
+  const Value *Object =
+      GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
+  if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
+      isa<Constant>(Object))
+    return MRI_ModRef;
+
+  ImmutableCallSite CS(I);
+  if (!CS.getInstruction() || CS.getInstruction() == Object)
+    return MRI_ModRef;
+
+  if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
+                                       /* StoreCaptures */ true, I, DT,
+                                       /* include Object */ true,
+                                       /* OrderedBasicBlock */ OBB))
+    return MRI_ModRef;
+
+  unsigned ArgNo = 0;
+  ModRefInfo R = MRI_NoModRef;
+  for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+       CI != CE; ++CI, ++ArgNo) {
+    // Only look at the no-capture or byval pointer arguments.  If this
+    // pointer were passed to arguments that were neither of these, then it
+    // couldn't be no-capture.
+    if (!(*CI)->getType()->isPointerTy() ||
+        (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+      continue;
+
+    // If this is a no-capture pointer argument, see if we can tell that it
+    // is impossible to alias the pointer we're checking.  If not, we have to
+    // assume that the call could touch the pointer, even though it doesn't
+    // escape.
+    if (isNoAlias(MemoryLocation(*CI), MemoryLocation(Object)))
+      continue;
+    if (CS.doesNotAccessMemory(ArgNo))
+      continue;
+    if (CS.onlyReadsMemory(ArgNo)) {
+      R = MRI_Ref;
+      continue;
+    }
+    return MRI_ModRef;
+  }
+  return R;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the location Loc.
+///
+bool AAResults::canBasicBlockModify(const BasicBlock &BB,
+                                    const MemoryLocation &Loc) {
+  return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);
+}
+
+/// canInstructionRangeModRef - Return true if it is possible for the
+/// execution of the specified instructions to mod\ref (according to the
+/// mode) the location Loc. The instructions to consider are all
+/// of the instructions in the range of [I1,I2] INCLUSIVE.
+/// I1 and I2 must be in the same basic block.
+bool AAResults::canInstructionRangeModRef(const Instruction &I1,
+                                          const Instruction &I2,
+                                          const MemoryLocation &Loc,
+                                          const ModRefInfo Mode) {
+  assert(I1.getParent() == I2.getParent() &&
+         "Instructions not in same basic block!");
+  BasicBlock::const_iterator I = I1.getIterator();
+  BasicBlock::const_iterator E = I2.getIterator();
+  ++E;  // Convert from inclusive to exclusive range.
+
+  for (; I != E; ++I) // Check every instruction in range
+    if (getModRefInfo(&*I, Loc) & Mode)
+      return true;
+  return false;
+}
+
+// Provide a definition for the root virtual destructor.
+AAResults::Concept::~Concept() {}
+
+namespace {
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+  typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT;
+
+  CallbackT CB;
+
+  static char ID;
+
+  ExternalAAWrapperPass() : ImmutablePass(ID) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+  explicit ExternalAAWrapperPass(CallbackT CB)
+      : ImmutablePass(ID), CB(std::move(CB)) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+};
+}
+
+char ExternalAAWrapperPass::ID = 0;
+INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
+                false, true)
+
+ImmutablePass *
+llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) {
+  return new ExternalAAWrapperPass(std::move(Callback));
+}
+
+AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) {
+  initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char AAResultsWrapperPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa",
+                      "Function Alias Analysis Results", false, true)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass)
+INITIALIZE_PASS_END(AAResultsWrapperPass, "aa",
+                    "Function Alias Analysis Results", false, true)
+
+FunctionPass *llvm::createAAResultsWrapperPass() {
+  return new AAResultsWrapperPass();
+}
+
+/// Run the wrapper pass to rebuild an aggregation over known AA passes.
+///
+/// This is the legacy pass manager's interface to the new-style AA results
+/// aggregation object. Because this is somewhat shoe-horned into the legacy
+/// pass manager, we hard code all the specific alias analyses available into
+/// it. While the particular set enabled is configured via commandline flags,
+/// adding a new alias analysis to LLVM will require adding support for it to
+/// this list.
+bool AAResultsWrapperPass::runOnFunction(Function &F) {
+  // NB! This *must* be reset before adding new AA results to the new
+  // AAResults object because in the legacy pass manager, each instance
+  // of these will refer to the *same* immutable analyses, registering and
+  // unregistering themselves with them. We need to carefully tear down the
+  // previous object first, in this case replacing it with an empty one, before
+  // registering new results.
+  AAR.reset(new AAResults());
+
+  // BasicAA is always available for function analyses. Also, we add it first
+  // so that it can trump TBAA results when it proves MustAlias.
+  // FIXME: TBAA should have an explicit mode to support this and then we
+  // should reconsider the ordering here.
+  if (!DisableBasicAA)
+    AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult());
+
+  // Populate the results with the currently available AAs.
+  if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass =
+          getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+
+  // If available, run an external AA providing callback over the results as
+  // well.
+  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
+    if (WrapperPass->CB)
+      WrapperPass->CB(*this, F, *AAR);
+
+  // Analyses don't mutate the IR, so return false.
+  return false;
+}
+
+void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicAAWrapperPass>();
+
+  // We also need to mark all the alias analysis passes we will potentially
+  // probe in runOnFunction as used here to ensure the legacy pass manager
+  // preserves them. This hard coding of lists of alias analyses is specific to
+  // the legacy pass manager.
+  AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>();
+  AU.addUsedIfAvailable<TypeBasedAAWrapperPass>();
+  AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>();
+  AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
+  AU.addUsedIfAvailable<SCEVAAWrapperPass>();
+  AU.addUsedIfAvailable<CFLAAWrapperPass>();
+}
+
+AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
+                                        BasicAAResult &BAR) {
+  AAResults AAR;
+
+  // Add in our explicitly constructed BasicAA results.
+  if (!DisableBasicAA)
+    AAR.addAAResult(BAR);
+
+  // Populate the results with the other currently available AAs.
+  if (auto *WrapperPass =
+          P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass =
+          P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+
+  return AAR;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+  if (auto CS = ImmutableCallSite(V))
+    return CS.paramHasAttr(0, Attribute::NoAlias);
+  return false;
+}
+
+/// isNoAliasArgument - Return true if this is an argument with the noalias
+/// attribute.
+bool llvm::isNoAliasArgument(const Value *V)
+{
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasNoAliasAttr();
+  return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object.  This returns true for:
+///    Global Variables and Functions (but not Global Aliases)
+///    Allocas and Mallocs
+///    ByVal and NoAlias Arguments
+///    NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+  if (isa<AllocaInst>(V))
+    return true;
+  if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+    return true;
+  if (isNoAliasCall(V))
+    return true;
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasNoAliasAttr() || A->hasByValAttr();
+  return false;
+}
+
+/// isIdentifiedFunctionLocal - Return true if V is umabigously identified
+/// at the function-level. Different IdentifiedFunctionLocals can't alias.
+/// Further, an IdentifiedFunctionLocal can not alias with any function
+/// arguments other than itself, which is not necessarily true for
+/// IdentifiedObjects.
+bool llvm::isIdentifiedFunctionLocal(const Value *V)
+{
+  return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 0000000..12917b6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,395 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
+
+namespace {
+  class AAEval : public FunctionPass {
+    unsigned NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount;
+    unsigned NoModRefCount, ModCount, RefCount, ModRefCount;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AAEval() : FunctionPass(ID) {
+      initializeAAEvalPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.setPreservesAll();
+    }
+
+    bool doInitialization(Module &M) override {
+      NoAliasCount = MayAliasCount = PartialAliasCount = MustAliasCount = 0;
+      NoModRefCount = ModCount = RefCount = ModRefCount = 0;
+
+      if (PrintAll) {
+        PrintNoAlias = PrintMayAlias = true;
+        PrintPartialAlias = PrintMustAlias = true;
+        PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+      }
+      return false;
+    }
+
+    bool runOnFunction(Function &F) override;
+    bool doFinalization(Module &M) override;
+  };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+                         const Value *V2, const Module *M) {
+  if (P) {
+    std::string o1, o2;
+    {
+      raw_string_ostream os1(o1), os2(o2);
+      V1->printAsOperand(os1, true, M);
+      V2->printAsOperand(os2, true, M);
+    }
+    
+    if (o2 < o1)
+      std::swap(o1, o2);
+    errs() << "  " << Msg << ":\t"
+           << o1 << ", "
+           << o2 << "\n";
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ":  Ptr: ";
+    Ptr->printAsOperand(errs(), true, M);
+    errs() << "\t<->" << *I << '\n';
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *CSA.getInstruction()
+           << " <-> " << *CSB.getInstruction() << '\n';
+  }
+}
+
+static inline void
+PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
+                      const Value *V2, const Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *V1
+           << " <-> " << *V2 << '\n';
+  }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+  return V->getType()->isPointerTy()
+      && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+  SetVector<Value *> Pointers;
+  SmallSetVector<CallSite, 16> CallSites;
+  SetVector<Value *> Loads;
+  SetVector<Value *> Stores;
+
+  for (auto &I : F.args())
+    if (I.getType()->isPointerTy())    // Add all pointer arguments.
+      Pointers.insert(&I);
+
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (I->getType()->isPointerTy()) // Add all pointer instructions.
+      Pointers.insert(&*I);
+    if (EvalAAMD && isa<LoadInst>(&*I))
+      Loads.insert(&*I);
+    if (EvalAAMD && isa<StoreInst>(&*I))
+      Stores.insert(&*I);
+    Instruction &Inst = *I;
+    if (auto CS = CallSite(&Inst)) {
+      Value *Callee = CS.getCalledValue();
+      // Skip actual functions for direct function calls.
+      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+        Pointers.insert(Callee);
+      // Consider formals.
+      for (Use &DataOp : CS.data_ops())
+        if (isInterestingPointer(DataOp))
+          Pointers.insert(DataOp);
+      CallSites.insert(CS);
+    } else {
+      // Consider all operands.
+      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+           OI != OE; ++OI)
+        if (isInterestingPointer(*OI))
+          Pointers.insert(*OI);
+    }
+  }
+
+  if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+      PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+    errs() << "Function: " << F.getName() << ": " << Pointers.size()
+           << " pointers, " << CallSites.size() << " call sites\n";
+
+  // iterate over the worklist, and run the full (n^2)/2 disambiguations
+  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+       I1 != E; ++I1) {
+    uint64_t I1Size = MemoryLocation::UnknownSize;
+    Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+    if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
+
+    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+      uint64_t I2Size = MemoryLocation::UnknownSize;
+      Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+      if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
+
+      switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+      case NoAlias:
+        PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+        ++NoAliasCount;
+        break;
+      case MayAlias:
+        PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+        ++MayAliasCount;
+        break;
+      case PartialAlias:
+        PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                     F.getParent());
+        ++PartialAliasCount;
+        break;
+      case MustAlias:
+        PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+        ++MustAliasCount;
+        break;
+      }
+    }
+  }
+
+  if (EvalAAMD) {
+    // iterate over all pairs of load, store
+    for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
+           I2 != E2; ++I2) {
+        switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)),
+                         MemoryLocation::get(cast<StoreInst>(*I2)))) {
+        case NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAliasCount;
+          break;
+        case MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAliasCount;
+          break;
+        case PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAliasCount;
+          break;
+        case MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAliasCount;
+          break;
+        }
+      }
+    }
+
+    // iterate over all pairs of store, store
+    for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
+        switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
+                         MemoryLocation::get(cast<StoreInst>(*I2)))) {
+        case NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAliasCount;
+          break;
+        case MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAliasCount;
+          break;
+        case PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAliasCount;
+          break;
+        case MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAliasCount;
+          break;
+        }
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls and values
+  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
+    Instruction *I = C->getInstruction();
+
+    for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+         V != Ve; ++V) {
+      uint64_t Size = MemoryLocation::UnknownSize;
+      Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+      if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
+
+      switch (AA.getModRefInfo(*C, *V, Size)) {
+      case MRI_NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+        ++NoModRefCount;
+        break;
+      case MRI_Mod:
+        PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+        ++ModCount;
+        break;
+      case MRI_Ref:
+        PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+        ++RefCount;
+        break;
+      case MRI_ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+        ++ModRefCount;
+        break;
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls
+  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
+    for (auto D = CallSites.begin(); D != Ce; ++D) {
+      if (D == C)
+        continue;
+      switch (AA.getModRefInfo(*C, *D)) {
+      case MRI_NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+        ++NoModRefCount;
+        break;
+      case MRI_Mod:
+        PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+        ++ModCount;
+        break;
+      case MRI_Ref:
+        PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+        ++RefCount;
+        break;
+      case MRI_ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+        ++ModRefCount;
+        break;
+      }
+    }
+  }
+
+  return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+  errs() << "(" << Num*100ULL/Sum << "."
+         << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+  unsigned AliasSum =
+      NoAliasCount + MayAliasCount + PartialAliasCount + MustAliasCount;
+  errs() << "===== Alias Analysis Evaluator Report =====\n";
+  if (AliasSum == 0) {
+    errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
+  } else {
+    errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
+    errs() << "  " << NoAliasCount << " no alias responses ";
+    PrintPercent(NoAliasCount, AliasSum);
+    errs() << "  " << MayAliasCount << " may alias responses ";
+    PrintPercent(MayAliasCount, AliasSum);
+    errs() << "  " << PartialAliasCount << " partial alias responses ";
+    PrintPercent(PartialAliasCount, AliasSum);
+    errs() << "  " << MustAliasCount << " must alias responses ";
+    PrintPercent(MustAliasCount, AliasSum);
+    errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
+           << NoAliasCount * 100 / AliasSum << "%/"
+           << MayAliasCount * 100 / AliasSum << "%/"
+           << PartialAliasCount * 100 / AliasSum << "%/"
+           << MustAliasCount * 100 / AliasSum << "%\n";
+  }
+
+  // Display the summary for mod/ref analysis
+  unsigned ModRefSum = NoModRefCount + ModCount + RefCount + ModRefCount;
+  if (ModRefSum == 0) {
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no "
+              "mod/ref!\n";
+  } else {
+    errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
+    errs() << "  " << NoModRefCount << " no mod/ref responses ";
+    PrintPercent(NoModRefCount, ModRefSum);
+    errs() << "  " << ModCount << " mod responses ";
+    PrintPercent(ModCount, ModRefSum);
+    errs() << "  " << RefCount << " ref responses ";
+    PrintPercent(RefCount, ModRefSum);
+    errs() << "  " << ModRefCount << " mod & ref responses ";
+    PrintPercent(ModRefCount, ModRefSum);
+    errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
+           << NoModRefCount * 100 / ModRefSum << "%/"
+           << ModCount * 100 / ModRefSum << "%/" << RefCount * 100 / ModRefSum
+           << "%/" << ModRefCount * 100 / ModRefSum << "%\n";
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 0000000..3094049
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,673 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+  assert(!AS.Forward && "Alias set is already forwarding!");
+  assert(!Forward && "This set is a forwarding set!!");
+
+  // Update the alias and access types of this set...
+  Access |= AS.Access;
+  Alias  |= AS.Alias;
+  Volatile |= AS.Volatile;
+
+  if (Alias == SetMustAlias) {
+    // Check that these two merged sets really are must aliases.  Since both
+    // used to be must-alias sets, we can just check any pointer from each set
+    // for aliasing.
+    AliasAnalysis &AA = AST.getAliasAnalysis();
+    PointerRec *L = getSomePointer();
+    PointerRec *R = AS.getSomePointer();
+
+    // If the pointers are not a must-alias pair, this set becomes a may alias.
+    if (AA.alias(MemoryLocation(L->getValue(), L->getSize(), L->getAAInfo()),
+                 MemoryLocation(R->getValue(), R->getSize(), R->getAAInfo())) !=
+        MustAlias)
+      Alias = SetMayAlias;
+  }
+
+  bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
+  if (UnknownInsts.empty()) {            // Merge call sites...
+    if (ASHadUnknownInsts) {
+      std::swap(UnknownInsts, AS.UnknownInsts);
+      addRef();
+    }
+  } else if (ASHadUnknownInsts) {
+    UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
+    AS.UnknownInsts.clear();
+  }
+
+  AS.Forward = this;  // Forward across AS now...
+  addRef();           // AS is now pointing to us...
+
+  // Merge the list of constituent pointers...
+  if (AS.PtrList) {
+    *PtrListEnd = AS.PtrList;
+    AS.PtrList->setPrevInList(PtrListEnd);
+    PtrListEnd = AS.PtrListEnd;
+
+    AS.PtrList = nullptr;
+    AS.PtrListEnd = &AS.PtrList;
+    assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
+  }
+  if (ASHadUnknownInsts)
+    AS.dropRef(AST);
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+  if (AliasSet *Fwd = AS->Forward) {
+    Fwd->dropRef(*this);
+    AS->Forward = nullptr;
+  }
+  AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+  assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+  AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+                          uint64_t Size, const AAMDNodes &AAInfo,
+                          bool KnownMustAlias) {
+  assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+  // Check to see if we have to downgrade to _may_ alias.
+  if (isMustAlias() && !KnownMustAlias)
+    if (PointerRec *P = getSomePointer()) {
+      AliasAnalysis &AA = AST.getAliasAnalysis();
+      AliasResult Result =
+          AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
+                   MemoryLocation(Entry.getValue(), Size, AAInfo));
+      if (Result != MustAlias)
+        Alias = SetMayAlias;
+      else                  // First entry of must alias must have maximum size!
+        P->updateSizeAndAAInfo(Size, AAInfo);
+      assert(Result != NoAlias && "Cannot be part of must set!");
+    }
+
+  Entry.setAliasSet(this);
+  Entry.updateSizeAndAAInfo(Size, AAInfo);
+
+  // Add it to the end of the list...
+  assert(*PtrListEnd == nullptr && "End of list is not null?");
+  *PtrListEnd = &Entry;
+  PtrListEnd = Entry.setPrevInList(PtrListEnd);
+  assert(*PtrListEnd == nullptr && "End of list is not null?");
+  addRef();               // Entry points to alias set.
+}
+
+void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+  if (UnknownInsts.empty())
+    addRef();
+  UnknownInsts.emplace_back(I);
+
+  if (!I->mayWriteToMemory()) {
+    Alias = SetMayAlias;
+    Access |= RefAccess;
+    return;
+  }
+
+  // FIXME: This should use mod/ref information to make this not suck so bad
+  Alias = SetMayAlias;
+  Access = ModRefAccess;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+                              const AAMDNodes &AAInfo,
+                              AliasAnalysis &AA) const {
+  if (Alias == SetMustAlias) {
+    assert(UnknownInsts.empty() && "Illegal must alias set!");
+
+    // If this is a set of MustAliases, only check to see if the pointer aliases
+    // SOME value in the set.
+    PointerRec *SomePtr = getSomePointer();
+    assert(SomePtr && "Empty must-alias set??");
+    return AA.alias(MemoryLocation(SomePtr->getValue(), SomePtr->getSize(),
+                                   SomePtr->getAAInfo()),
+                    MemoryLocation(Ptr, Size, AAInfo));
+  }
+
+  // If this is a may-alias set, we have to check all of the pointers in the set
+  // to be sure it doesn't alias the set...
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.alias(MemoryLocation(Ptr, Size, AAInfo),
+                 MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
+      return true;
+
+  // Check the unknown instructions...
+  if (!UnknownInsts.empty()) {
+    for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
+      if (AA.getModRefInfo(UnknownInsts[i],
+                           MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
+        return true;
+  }
+
+  return false;
+}
+
+bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
+                                  AliasAnalysis &AA) const {
+  if (!Inst->mayReadOrWriteMemory())
+    return false;
+
+  for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+    ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
+    if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+        AA.getModRefInfo(C2, C1) != MRI_NoModRef)
+      return true;
+  }
+
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(),
+                                              I.getAAInfo())) != MRI_NoModRef)
+      return true;
+
+  return false;
+}
+
+void AliasSetTracker::clear() {
+  // Delete all the PointerRec entries.
+  for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+       I != E; ++I)
+    I->second->eraseFromList();
+  
+  PointerMap.clear();
+  
+  // The alias sets should all be clear now.
+  AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into.  If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+                                                  uint64_t Size,
+                                                  const AAMDNodes &AAInfo) {
+  AliasSet *FoundSet = nullptr;
+  for (iterator I = begin(), E = end(); I != E;) {
+    iterator Cur = I++;
+    if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
+    
+    if (!FoundSet) {      // If this is the first alias set ptr can go into.
+      FoundSet = &*Cur;   // Remember it.
+    } else {              // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
+    }
+  }
+
+  return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise.  This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
+                                      const AAMDNodes &AAInfo) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA))
+      return true;
+  return false;
+}
+
+bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
+      return true;
+  return false;
+}
+
+AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
+  AliasSet *FoundSet = nullptr;
+  for (iterator I = begin(), E = end(); I != E;) {
+    iterator Cur = I++;
+    if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
+      continue;
+    if (!FoundSet)            // If this is the first alias set ptr can go into.
+      FoundSet = &*Cur;       // Remember it.
+    else if (!Cur->Forward)   // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
+  }
+  return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+                                                 const AAMDNodes &AAInfo,
+                                                 bool *New) {
+  AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+  // Check to see if the pointer is already known.
+  if (Entry.hasAliasSet()) {
+    Entry.updateSizeAndAAInfo(Size, AAInfo);
+    // Return the set!
+    return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+  }
+  
+  if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, AAInfo)) {
+    // Add it to the alias set it aliases.
+    AS->addPointer(*this, Entry, Size, AAInfo);
+    return *AS;
+  }
+  
+  if (New) *New = true;
+  // Otherwise create a new alias set to hold the loaded pointer.
+  AliasSets.push_back(new AliasSet());
+  AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
+  return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+  bool NewPtr;
+  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+  if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+
+  AAMDNodes AAInfo;
+  LI->getAAMetadata(AAInfo);
+
+  AliasSet::AccessLattice Access = AliasSet::RefAccess;
+  bool NewPtr;
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+  AliasSet &AS = addPointer(LI->getOperand(0),
+                            DL.getTypeStoreSize(LI->getType()),
+                            AAInfo, Access, NewPtr);
+  if (LI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+  if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+
+  AAMDNodes AAInfo;
+  SI->getAAMetadata(AAInfo);
+
+  AliasSet::AccessLattice Access = AliasSet::ModAccess;
+  bool NewPtr;
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  Value *Val = SI->getOperand(0);
+  AliasSet &AS = addPointer(SI->getOperand(1),
+                            DL.getTypeStoreSize(Val->getType()),
+                            AAInfo, Access, NewPtr);
+  if (SI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+  AAMDNodes AAInfo;
+  VAAI->getAAMetadata(AAInfo);
+
+  bool NewPtr;
+  addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
+             AliasSet::ModRefAccess, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::addUnknown(Instruction *Inst) {
+  if (isa<DbgInfoIntrinsic>(Inst)) 
+    return true; // Ignore DbgInfo Intrinsics.
+  if (!Inst->mayReadOrWriteMemory())
+    return true; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForUnknownInst(Inst);
+  if (AS) {
+    AS->addUnknownInst(Inst, AA);
+    return false;
+  }
+  AliasSets.push_back(new AliasSet());
+  AS = &AliasSets.back();
+  AS->addUnknownInst(Inst, AA);
+  return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+  // Dispatch to one of the other add methods.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return add(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return add(SI);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return add(VAAI);
+  return addUnknown(I);
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+  for (auto &I : BB)
+    add(&I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+  assert(&AA == &AST.AA &&
+         "Merging AliasSetTracker objects with different Alias Analyses!");
+
+  // Loop over all of the alias sets in AST, adding the pointers contained
+  // therein into the current alias sets.  This can cause alias sets to be
+  // merged together in the current AST.
+  for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+    if (I->Forward) continue;   // Ignore forwarding alias sets
+    
+    AliasSet &AS = const_cast<AliasSet&>(*I);
+
+    // If there are any call sites in the alias set, add them to this AST.
+    for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
+      add(AS.UnknownInsts[i]);
+
+    // Loop over all of the pointers in this alias set.
+    bool X;
+    for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+      AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+                                   ASI.getAAInfo(),
+                                   (AliasSet::AccessLattice)AS.Access, X);
+      if (AS.isVolatile()) NewAS.setVolatile();
+    }
+  }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+  // Drop all call sites.
+  if (!AS.UnknownInsts.empty())
+    AS.dropRef(*this);
+  AS.UnknownInsts.clear();
+  
+  // Clear the alias set.
+  unsigned NumRefs = 0;
+  while (!AS.empty()) {
+    AliasSet::PointerRec *P = AS.PtrList;
+
+    Value *ValToRemove = P->getValue();
+    
+    // Unlink and delete entry from the list of values.
+    P->eraseFromList();
+    
+    // Remember how many references need to be dropped.
+    ++NumRefs;
+
+    // Finally, remove the entry.
+    PointerMap.erase(ValToRemove);
+  }
+  
+  // Stop using the alias set, removing it.
+  AS.RefCount -= NumRefs;
+  if (AS.RefCount == 0)
+    AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+  AliasSet *AS = findAliasSetForPointer(Ptr, Size, AAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(LI->getType());
+
+  AAMDNodes AAInfo;
+  LI->getAAMetadata(AAInfo);
+
+  AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, AAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
+
+  AAMDNodes AAInfo;
+  SI->getAAMetadata(AAInfo);
+
+  AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, AAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+  AAMDNodes AAInfo;
+  VAAI->getAAMetadata(AAInfo);
+
+  AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+                                        MemoryLocation::UnknownSize, AAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::removeUnknown(Instruction *I) {
+  if (!I->mayReadOrWriteMemory())
+    return false; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForUnknownInst(I);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+  // Dispatch to one of the other remove methods...
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return remove(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return remove(SI);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return remove(VAAI);
+  return removeUnknown(I);
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely.  It should be used when an instruction is deleted
+// from the program to update the AST.  If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+  // If this is a call instruction, remove the callsite from the appropriate
+  // AliasSet (if present).
+  if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
+    if (Inst->mayReadOrWriteMemory()) {
+      // Scan all the alias sets to see if this call site is contained.
+      for (iterator I = begin(), E = end(); I != E;) {
+        iterator Cur = I++;
+        if (!Cur->Forward)
+          Cur->removeUnknownInst(*this, Inst);
+      }
+    }
+  }
+
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find_as(PtrVal);
+  if (I == PointerMap.end()) return;  // Noop
+
+  // If we found one, remove the pointer from the alias set it is in.
+  AliasSet::PointerRec *PtrValEnt = I->second;
+  AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+  // Unlink and delete from the list of values.
+  PtrValEnt->eraseFromList();
+  
+  // Stop using the alias set.
+  AS->dropRef(*this);
+  
+  PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value.  Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find_as(From);
+  if (I == PointerMap.end())
+    return;  // Noop
+  assert(I->second->hasAliasSet() && "Dead entry?");
+
+  AliasSet::PointerRec &Entry = getEntryFor(To);
+  if (Entry.hasAliasSet()) return;    // Already in the tracker!
+
+  // Add it to the alias set it aliases...
+  I = PointerMap.find_as(From);
+  AliasSet *AS = I->second->getAliasSet(*this);
+  AS->addPointer(*this, Entry, I->second->getSize(),
+                 I->second->getAAInfo(),
+                 true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//               AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+  OS << "  AliasSet[" << (const void*)this << ", " << RefCount << "] ";
+  OS << (Alias == SetMustAlias ? "must" : "may") << " alias, ";
+  switch (Access) {
+  case NoAccess:     OS << "No access "; break;
+  case RefAccess:    OS << "Ref       "; break;
+  case ModAccess:    OS << "Mod       "; break;
+  case ModRefAccess: OS << "Mod/Ref   "; break;
+  default: llvm_unreachable("Bad value for Access!");
+  }
+  if (isVolatile()) OS << "[volatile] ";
+  if (Forward)
+    OS << " forwarding to " << (void*)Forward;
+
+
+  if (!empty()) {
+    OS << "Pointers: ";
+    for (iterator I = begin(), E = end(); I != E; ++I) {
+      if (I != begin()) OS << ", ";
+      I.getPointer()->printAsOperand(OS << "(");
+      OS << ", " << I.getSize() << ")";
+    }
+  }
+  if (!UnknownInsts.empty()) {
+    OS << "\n    " << UnknownInsts.size() << " Unknown instructions: ";
+    for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+      if (i) OS << ", ";
+      UnknownInsts[i]->printAsOperand(OS);
+    }
+  }
+  OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+  OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+     << PointerMap.size() << " pointer values.\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    I->print(OS);
+  OS << "\n";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+//                     ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+  assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+  AST->deleteValue(getValPtr());
+  // this now dangles!
+}
+
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+  AST->copyValue(getValPtr(), V);
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+  : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+  return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+//                            AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class AliasSetPrinter : public FunctionPass {
+    AliasSetTracker *Tracker;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AliasSetPrinter() : FunctionPass(ID) {
+      initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+      AU.addRequired<AAResultsWrapperPass>();
+    }
+
+    bool runOnFunction(Function &F) override {
+      auto &AAWP = getAnalysis<AAResultsWrapperPass>();
+      Tracker = new AliasSetTracker(AAWP.getAAResults());
+
+      for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+        Tracker->add(&*I);
+      Tracker->print(errs());
+      delete Tracker;
+      return false;
+    }
+  };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
new file mode 100644
index 0000000..9c1ac00
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -0,0 +1,124 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+  initializeAAEvalPass(Registry);
+  initializeAliasSetPrinterPass(Registry);
+  initializeBasicAAWrapperPassPass(Registry);
+  initializeBlockFrequencyInfoWrapperPassPass(Registry);
+  initializeBranchProbabilityInfoWrapperPassPass(Registry);
+  initializeCallGraphWrapperPassPass(Registry);
+  initializeCallGraphPrinterPass(Registry);
+  initializeCallGraphViewerPass(Registry);
+  initializeCostModelAnalysisPass(Registry);
+  initializeCFGViewerPass(Registry);
+  initializeCFGPrinterPass(Registry);
+  initializeCFGOnlyViewerPass(Registry);
+  initializeCFGOnlyPrinterPass(Registry);
+  initializeCFLAAWrapperPassPass(Registry);
+  initializeDependenceAnalysisPass(Registry);
+  initializeDelinearizationPass(Registry);
+  initializeDemandedBitsPass(Registry);
+  initializeDivergenceAnalysisPass(Registry);
+  initializeDominanceFrontierPass(Registry);
+  initializeDomViewerPass(Registry);
+  initializeDomPrinterPass(Registry);
+  initializeDomOnlyViewerPass(Registry);
+  initializePostDomViewerPass(Registry);
+  initializeDomOnlyPrinterPass(Registry);
+  initializePostDomPrinterPass(Registry);
+  initializePostDomOnlyViewerPass(Registry);
+  initializePostDomOnlyPrinterPass(Registry);
+  initializeAAResultsWrapperPassPass(Registry);
+  initializeGlobalsAAWrapperPassPass(Registry);
+  initializeIVUsersPass(Registry);
+  initializeInstCountPass(Registry);
+  initializeIntervalPartitionPass(Registry);
+  initializeLazyValueInfoPass(Registry);
+  initializeLintPass(Registry);
+  initializeLoopInfoWrapperPassPass(Registry);
+  initializeMemDepPrinterPass(Registry);
+  initializeMemDerefPrinterPass(Registry);
+  initializeMemoryDependenceAnalysisPass(Registry);
+  initializeModuleDebugInfoPrinterPass(Registry);
+  initializeObjCARCAAWrapperPassPass(Registry);
+  initializePostDominatorTreePass(Registry);
+  initializeRegionInfoPassPass(Registry);
+  initializeRegionViewerPass(Registry);
+  initializeRegionPrinterPass(Registry);
+  initializeRegionOnlyViewerPass(Registry);
+  initializeRegionOnlyPrinterPass(Registry);
+  initializeSCEVAAWrapperPassPass(Registry);
+  initializeScalarEvolutionWrapperPassPass(Registry);
+  initializeTargetTransformInfoWrapperPassPass(Registry);
+  initializeTypeBasedAAWrapperPassPass(Registry);
+  initializeScopedNoAliasAAWrapperPassPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessages) {
+  raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
+  std::string Messages;
+  raw_string_ostream MsgsOS(Messages);
+
+  LLVMBool Result = verifyModule(*unwrap(M), OutMessages ? &MsgsOS : DebugOS);
+
+  // Duplicate the output to stderr.
+  if (DebugOS && OutMessages)
+    *DebugOS << MsgsOS.str();
+
+  if (Action == LLVMAbortProcessAction && Result)
+    report_fatal_error("Broken module found, compilation aborted!");
+
+  if (OutMessages)
+    *OutMessages = strdup(MsgsOS.str().c_str());
+
+  return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+  LLVMBool Result = verifyFunction(
+      *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs()
+                                                              : nullptr);
+
+  if (Action == LLVMAbortProcessAction && Result)
+    report_fatal_error("Broken function found, compilation aborted!");
+
+  return Result;
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFGOnly();
+}
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
new file mode 100644
index 0000000..f468a43
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -0,0 +1,140 @@
+//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void AssumptionCache::scanFunction() {
+  assert(!Scanned && "Tried to scan the function twice!");
+  assert(AssumeHandles.empty() && "Already have assumes when scanning!");
+
+  // Go through all instructions in all blocks, add all calls to @llvm.assume
+  // to this cache.
+  for (BasicBlock &B : F)
+    for (Instruction &II : B)
+      if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+        AssumeHandles.push_back(&II);
+
+  // Mark the scan as complete.
+  Scanned = true;
+}
+
+void AssumptionCache::registerAssumption(CallInst *CI) {
+  assert(match(CI, m_Intrinsic<Intrinsic::assume>()) &&
+         "Registered call does not call @llvm.assume");
+
+  // If we haven't scanned the function yet, just drop this assumption. It will
+  // be found when we scan later.
+  if (!Scanned)
+    return;
+
+  AssumeHandles.push_back(CI);
+
+#ifndef NDEBUG
+  assert(CI->getParent() &&
+         "Cannot register @llvm.assume call not in a basic block");
+  assert(&F == CI->getParent()->getParent() &&
+         "Cannot register @llvm.assume call not in this function");
+
+  // We expect the number of assumptions to be small, so in an asserts build
+  // check that we don't accumulate duplicates and that all assumptions point
+  // to the same function.
+  SmallPtrSet<Value *, 16> AssumptionSet;
+  for (auto &VH : AssumeHandles) {
+    if (!VH)
+      continue;
+
+    assert(&F == cast<Instruction>(VH)->getParent()->getParent() &&
+           "Cached assumption not inside this function!");
+    assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) &&
+           "Cached something other than a call to @llvm.assume!");
+    assert(AssumptionSet.insert(VH).second &&
+           "Cache contains multiple copies of a call!");
+  }
+#endif
+}
+
+char AssumptionAnalysis::PassID;
+
+PreservedAnalyses AssumptionPrinterPass::run(Function &F,
+                                             AnalysisManager<Function> *AM) {
+  AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F);
+
+  OS << "Cached assumptions for function: " << F.getName() << "\n";
+  for (auto &VH : AC.assumptions())
+    if (VH)
+      OS << "  " << *cast<CallInst>(VH)->getArgOperand(0) << "\n";
+
+  return PreservedAnalyses::all();
+}
+
+void AssumptionCacheTracker::FunctionCallbackVH::deleted() {
+  auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr()));
+  if (I != ACT->AssumptionCaches.end())
+    ACT->AssumptionCaches.erase(I);
+  // 'this' now dangles!
+}
+
+AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
+  // We probe the function map twice to try and avoid creating a value handle
+  // around the function in common cases. This makes insertion a bit slower,
+  // but if we have to insert we're going to scan the whole function so that
+  // shouldn't matter.
+  auto I = AssumptionCaches.find_as(&F);
+  if (I != AssumptionCaches.end())
+    return *I->second;
+
+  // Ok, build a new cache by scanning the function, insert it and the value
+  // handle into our map, and return the newly populated cache.
+  auto IP = AssumptionCaches.insert(std::make_pair(
+      FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+  assert(IP.second && "Scanning function already in the map?");
+  return *IP.first->second;
+}
+
+void AssumptionCacheTracker::verifyAnalysis() const {
+#ifndef NDEBUG
+  SmallPtrSet<const CallInst *, 4> AssumptionSet;
+  for (const auto &I : AssumptionCaches) {
+    for (auto &VH : I.second->assumptions())
+      if (VH)
+        AssumptionSet.insert(cast<CallInst>(VH));
+
+    for (const BasicBlock &B : cast<Function>(*I.first))
+      for (const Instruction &II : B)
+        if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+          assert(AssumptionSet.count(cast<CallInst>(&II)) &&
+                 "Assumption in scanned function not in cache");
+  }
+#endif
+}
+
+AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
+  initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
+}
+
+AssumptionCacheTracker::~AssumptionCacheTracker() {}
+
+INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
+                "Assumption Cache Tracker", false, true)
+char AssumptionCacheTracker::ID = 0;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 0000000..85404d8
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1611 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+using namespace llvm;
+
+/// Enable analysis of recursive PHI nodes.
+static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
+                                          cl::init(false));
+
+/// SearchLimitReached / SearchTimes shows how often the limit of
+/// to decompose GEPs is reached. It will affect the precision
+/// of basic alias analysis.
+#define DEBUG_TYPE "basicaa"
+STATISTIC(SearchLimitReached, "Number of times the limit to "
+                              "decompose GEPs is reached");
+STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
+
+/// Cutoff after which to stop analysing a set of phi nodes potentially involved
+/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// careful with value equivalence. We use reachability to make sure a value
+/// cannot be involved in a cycle.
+const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
+
+// The max limit of the search depth in DecomposeGEPExpression() and
+// GetUnderlyingObject(), both functions need to use the same search
+// depth otherwise the algorithm in aliasGEP will assert.
+static const unsigned MaxLookupSearchDepth = 6;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// Returns true if the pointer is to a function-local object that never
+/// escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+  // If this is a local allocation, check to see if it escapes.
+  if (isa<AllocaInst>(V) || isNoAliasCall(V))
+    // Set StoreCaptures to True so that we can assume in our callers that the
+    // pointer is not the result of a load instruction. Currently
+    // PointerMayBeCaptured doesn't have any special analysis for the
+    // StoreCaptures=false case; if it did, our callers could be refined to be
+    // more precise.
+    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+  // If this is an argument that corresponds to a byval or noalias argument,
+  // then it has not escaped before entering the function.  Check if it escapes
+  // inside the function.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    if (A->hasByValAttr() || A->hasNoAliasAttr())
+      // Note even if the argument is marked nocapture we still need to check
+      // for copies made inside the function. The nocapture attribute only
+      // specifies that there are no copies made that outlive the function.
+      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+  return false;
+}
+
+/// Returns true if the pointer is one which would have been considered an
+/// escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+    return true;
+
+  // The load case works because isNonEscapingLocalObject considers all
+  // stores to be escapes (it passes true for the StoreCaptures argument
+  // to PointerMayBeCaptured).
+  if (isa<LoadInst>(V))
+    return true;
+
+  return false;
+}
+
+/// Returns the size of the object specified by V, or UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
+                              const TargetLibraryInfo &TLI,
+                              bool RoundToAlign = false) {
+  uint64_t Size;
+  if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
+    return Size;
+  return MemoryLocation::UnknownSize;
+}
+
+/// Returns true if we can prove that the object specified by V is smaller than
+/// Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+                                const DataLayout &DL,
+                                const TargetLibraryInfo &TLI) {
+  // Note that the meanings of the "object" are slightly different in the
+  // following contexts:
+  //    c1: llvm::getObjectSize()
+  //    c2: llvm.objectsize() intrinsic
+  //    c3: isObjectSmallerThan()
+  // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+  // refers to the "entire object".
+  //
+  //  Consider this example:
+  //     char *p = (char*)malloc(100)
+  //     char *q = p+80;
+  //
+  //  In the context of c1 and c2, the "object" pointed by q refers to the
+  // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+  //
+  //  However, in the context of c3, the "object" refers to the chunk of memory
+  // being allocated. So, the "object" has 100 bytes, and q points to the middle
+  // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+  // parameter, before the llvm::getObjectSize() is called to get the size of
+  // entire object, we should:
+  //    - either rewind the pointer q to the base-address of the object in
+  //      question (in this case rewind to p), or
+  //    - just give up. It is up to caller to make sure the pointer is pointing
+  //      to the base address the object.
+  //
+  // We go for 2nd option for simplicity.
+  if (!isIdentifiedObject(V))
+    return false;
+
+  // This function needs to use the aligned object size because we allow
+  // reads a bit past the end given sufficient alignment.
+  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);
+
+  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
+}
+
+/// Returns true if we can prove that the object specified by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+                         const TargetLibraryInfo &TLI) {
+  uint64_t ObjectSize = getObjectSize(V, DL, TLI);
+  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+/// Analyzes the specified value as a linear expression: "A*V + B", where A and
+/// B are constant integers.
+///
+/// Returns the scale and offset values as APInts and return V as a Value*, and
+/// return whether we looked through any sign or zero extends.  The incoming
+/// Value is known to have IntegerType and it may already be sign or zero
+/// extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+/*static*/ const Value *BasicAAResult::GetLinearExpression(
+    const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits,
+    unsigned &SExtBits, const DataLayout &DL, unsigned Depth,
+    AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) {
+  assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+  // Limit our recursion depth.
+  if (Depth == 6) {
+    Scale = 1;
+    Offset = 0;
+    return V;
+  }
+
+  if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+    // if it's a constant, just convert it to an offset and remove the variable.
+    // If we've been called recursively the Offset bit width will be greater
+    // than the constant's (the Offset's always as wide as the outermost call),
+    // so we'll zext here and process any extension in the isa<SExtInst> &
+    // isa<ZExtInst> cases below.
+    Offset += Const->getValue().zextOrSelf(Offset.getBitWidth());
+    assert(Scale == 0 && "Constant values don't have a scale");
+    return V;
+  }
+
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+
+      // If we've been called recursively then Offset and Scale will be wider
+      // that the BOp operands. We'll always zext it here as we'll process sign
+      // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
+      APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth());
+
+      switch (BOp->getOpcode()) {
+      default:
+        // We don't understand this instruction, so we can't decompose it any
+        // further.
+        Scale = 1;
+        Offset = 0;
+        return V;
+      case Instruction::Or:
+        // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
+        // analyze it.
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
+                               BOp, DT)) {
+          Scale = 1;
+          Offset = 0;
+          return V;
+        }
+      // FALL THROUGH.
+      case Instruction::Add:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset += RHS;
+        break;
+      case Instruction::Sub:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset -= RHS;
+        break;
+      case Instruction::Mul:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset *= RHS;
+        Scale *= RHS;
+        break;
+      case Instruction::Shl:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset <<= RHS.getLimitedValue();
+        Scale <<= RHS.getLimitedValue();
+        // the semantics of nsw and nuw for left shifts don't match those of
+        // multiplications, so we won't propagate them.
+        NSW = NUW = false;
+        return V;
+      }
+
+      if (isa<OverflowingBinaryOperator>(BOp)) {
+        NUW &= BOp->hasNoUnsignedWrap();
+        NSW &= BOp->hasNoSignedWrap();
+      }
+      return V;
+    }
+  }
+
+  // Since GEP indices are sign extended anyway, we don't care about the high
+  // bits of a sign or zero extended value - just scales and offsets.  The
+  // extensions have to be consistent though.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
+    Value *CastOp = cast<CastInst>(V)->getOperand(0);
+    unsigned NewWidth = V->getType()->getPrimitiveSizeInBits();
+    unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+    unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits;
+    const Value *Result =
+        GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
+                            Depth + 1, AC, DT, NSW, NUW);
+
+    // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+    // by just incrementing the number of bits we've extended by.
+    unsigned ExtendedBy = NewWidth - SmallWidth;
+
+    if (isa<SExtInst>(V) && ZExtBits == 0) {
+      // sext(sext(%x, a), b) == sext(%x, a + b)
+
+      if (NSW) {
+        // We haven't sign-wrapped, so it's valid to decompose sext(%x + c)
+        // into sext(%x) + sext(c). We'll sext the Offset ourselves:
+        unsigned OldWidth = Offset.getBitWidth();
+        Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth);
+      } else {
+        // We may have signed-wrapped, so don't decompose sext(%x + c) into
+        // sext(%x) + sext(c)
+        Scale = 1;
+        Offset = 0;
+        Result = CastOp;
+        ZExtBits = OldZExtBits;
+        SExtBits = OldSExtBits;
+      }
+      SExtBits += ExtendedBy;
+    } else {
+      // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b)
+
+      if (!NUW) {
+        // We may have unsigned-wrapped, so don't decompose zext(%x + c) into
+        // zext(%x) + zext(c)
+        Scale = 1;
+        Offset = 0;
+        Result = CastOp;
+        ZExtBits = OldZExtBits;
+        SExtBits = OldSExtBits;
+      }
+      ZExtBits += ExtendedBy;
+    }
+
+    return Result;
+  }
+
+  Scale = 1;
+  Offset = 0;
+  return V;
+}
+
+/// If V is a symbolic pointer expression, decompose it into a base pointer
+/// with a constant offset and a number of scaled symbolic offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale
+/// in the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As
+/// such, the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When DataLayout is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through. To be able to do that
+/// GetUnderlyingObject and DecomposeGEPExpression must use the same search
+/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
+/// through pointer casts.
+/*static*/ const Value *BasicAAResult::DecomposeGEPExpression(
+    const Value *V, int64_t &BaseOffs,
+    SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached,
+    const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {
+  // Limit recursion depth to limit compile time in crazy cases.
+  unsigned MaxLookup = MaxLookupSearchDepth;
+  MaxLookupReached = false;
+  SearchTimes++;
+
+  BaseOffs = 0;
+  do {
+    // See if this is a bitcast or GEP.
+    const Operator *Op = dyn_cast<Operator>(V);
+    if (!Op) {
+      // The only non-operator case we can handle are GlobalAliases.
+      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+        if (!GA->mayBeOverridden()) {
+          V = GA->getAliasee();
+          continue;
+        }
+      }
+      return V;
+    }
+
+    if (Op->getOpcode() == Instruction::BitCast ||
+        Op->getOpcode() == Instruction::AddrSpaceCast) {
+      V = Op->getOperand(0);
+      continue;
+    }
+
+    const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+    if (!GEPOp) {
+      // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+      // can come up with something. This matches what GetUnderlyingObject does.
+      if (const Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Get a DominatorTree and AssumptionCache and use them here
+        // (these are both now available in this function, but this should be
+        // updated when GetUnderlyingObject is updated). TLI should be
+        // provided also.
+        if (const Value *Simplified =
+                SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
+          V = Simplified;
+          continue;
+        }
+
+      return V;
+    }
+
+    // Don't attempt to analyze GEPs over unsized objects.
+    if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
+      return V;
+
+    unsigned AS = GEPOp->getPointerAddressSpace();
+    // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+    gep_type_iterator GTI = gep_type_begin(GEPOp);
+    for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
+         I != E; ++I) {
+      const Value *Index = *I;
+      // Compute the (potentially symbolic) offset in bytes for this index.
+      if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+        // For a struct, add the member offset.
+        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+        if (FieldNo == 0)
+          continue;
+
+        BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
+        continue;
+      }
+
+      // For an array/pointer, add the element offset, explicitly scaled.
+      if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+        if (CIdx->isZero())
+          continue;
+        BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
+        continue;
+      }
+
+      uint64_t Scale = DL.getTypeAllocSize(*GTI);
+      unsigned ZExtBits = 0, SExtBits = 0;
+
+      // If the integer type is smaller than the pointer size, it is implicitly
+      // sign extended to pointer size.
+      unsigned Width = Index->getType()->getIntegerBitWidth();
+      unsigned PointerSize = DL.getPointerSizeInBits(AS);
+      if (PointerSize > Width)
+        SExtBits += PointerSize - Width;
+
+      // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+      APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+      bool NSW = true, NUW = true;
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
+                                  SExtBits, DL, 0, AC, DT, NSW, NUW);
+
+      // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+      // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+      BaseOffs += IndexOffset.getSExtValue() * Scale;
+      Scale *= IndexScale.getSExtValue();
+
+      // If we already had an occurrence of this index variable, merge this
+      // scale into it.  For example, we want to handle:
+      //   A[x][x] -> x*16 + x*4 -> x*20
+      // This also ensures that 'x' only appears in the index list once.
+      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+        if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits &&
+            VarIndices[i].SExtBits == SExtBits) {
+          Scale += VarIndices[i].Scale;
+          VarIndices.erase(VarIndices.begin() + i);
+          break;
+        }
+      }
+
+      // Make sure that we have a scale that makes sense for this target's
+      // pointer size.
+      if (unsigned ShiftBits = 64 - PointerSize) {
+        Scale <<= ShiftBits;
+        Scale = (int64_t)Scale >> ShiftBits;
+      }
+
+      if (Scale) {
+        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
+                                  static_cast<int64_t>(Scale)};
+        VarIndices.push_back(Entry);
+      }
+    }
+
+    // Analyze the base pointer next.
+    V = GEPOp->getOperand(0);
+  } while (--MaxLookup);
+
+  // If the chain of expressions is too deep, just return early.
+  MaxLookupReached = true;
+  SearchLimitReached++;
+  return V;
+}
+
+/// Returns whether the given pointer value points to memory that is local to
+/// the function, with global constants being considered local to all
+/// functions.
+bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                           bool OrLocal) {
+  assert(Visited.empty() && "Visited must be cleared after use!");
+
+  unsigned MaxLookup = 8;
+  SmallVector<const Value *, 16> Worklist;
+  Worklist.push_back(Loc.Ptr);
+  do {
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
+    if (!Visited.insert(V).second) {
+      Visited.clear();
+      return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // An alloca instruction defines local memory.
+    if (OrLocal && isa<AllocaInst>(V))
+      continue;
+
+    // A global constant counts as local memory for our purposes.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+      // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+      // global to be marked constant in some modules and non-constant in
+      // others.  GV may even be a declaration, not a definition.
+      if (!GV->isConstant()) {
+        Visited.clear();
+        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+      }
+      continue;
+    }
+
+    // If both select values point to local memory, then so does the select.
+    if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // If all values incoming to a phi node point to local memory, then so does
+    // the phi.
+    if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+      // Don't bother inspecting phi nodes with many operands.
+      if (PN->getNumIncomingValues() > MaxLookup) {
+        Visited.clear();
+        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+      }
+      for (Value *IncValue : PN->incoming_values())
+        Worklist.push_back(IncValue);
+      continue;
+    }
+
+    // Otherwise be conservative.
+    Visited.clear();
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+
+  } while (!Worklist.empty() && --MaxLookup);
+
+  Visited.clear();
+  return Worklist.empty();
+}
+
+// FIXME: This code is duplicated with MemoryLocation and should be hoisted to
+// some common utility location.
+static bool isMemsetPattern16(const Function *MS,
+                              const TargetLibraryInfo &TLI) {
+  if (TLI.has(LibFunc::memset_pattern16) &&
+      MS->getName() == "memset_pattern16") {
+    FunctionType *MemsetType = MS->getFunctionType();
+    if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+        isa<PointerType>(MemsetType->getParamType(0)) &&
+        isa<PointerType>(MemsetType->getParamType(1)) &&
+        isa<IntegerType>(MemsetType->getParamType(2)))
+      return true;
+  }
+  return false;
+}
+
+/// Returns the behavior when calling the given call site.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
+  if (CS.doesNotAccessMemory())
+    // Can't do better than this.
+    return FMRB_DoesNotAccessMemory;
+
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  // If the callsite knows it only reads memory, don't return worse
+  // than that.
+  if (CS.onlyReadsMemory())
+    Min = FMRB_OnlyReadsMemory;
+
+  if (CS.onlyAccessesArgMemory())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+
+  // The AAResultBase base class has some smarts, lets use them.
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+/// Returns the behavior when calling the given function. For use when the call
+/// site is not known.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
+  // If the function declares it doesn't access memory, we can't do better.
+  if (F->doesNotAccessMemory())
+    return FMRB_DoesNotAccessMemory;
+
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  // If the function declares it only reads memory, go with that.
+  if (F->onlyReadsMemory())
+    Min = FMRB_OnlyReadsMemory;
+
+  if (F->onlyAccessesArgMemory())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+
+  // Otherwise be conservative.
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+}
+
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+                                           unsigned ArgIdx) {
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
+    switch (II->getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+      // We don't currently have a writeonly attribute.  All other properties
+      // of these intrinsics are nicely described via attributes in
+      // Intrinsics.td and handled generically below.
+      if (ArgIdx == 0)
+        return MRI_Mod;
+    }
+
+  // We can bound the aliasing properties of memset_pattern16 just as we can
+  // for memcpy/memset.  This is particularly important because the
+  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+  // whenever possible.  Note that all but the missing writeonly attribute are
+  // handled via InferFunctionAttr.
+  if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
+    if (ArgIdx == 0)
+      return MRI_Mod;
+
+  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+    return MRI_Ref;
+
+  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+    return MRI_NoModRef;
+
+  return AAResultBase::getArgModRefInfo(CS, ArgIdx);
+}
+
+static bool isAssumeIntrinsic(ImmutableCallSite CS) {
+  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+  return II && II->getIntrinsicID() == Intrinsic::assume;
+}
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+  if (const Instruction *inst = dyn_cast<Instruction>(V))
+    return inst->getParent()->getParent();
+
+  if (const Argument *arg = dyn_cast<Argument>(V))
+    return arg->getParent();
+
+  return nullptr;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+  const Function *F1 = getParent(O1);
+  const Function *F2 = getParent(O2);
+
+  return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
+                                 const MemoryLocation &LocB) {
+  assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+         "BasicAliasAnalysis doesn't support interprocedural queries.");
+
+  // If we have a directly cached entry for these locations, we have recursed
+  // through this once, so just return the cached results. Notably, when this
+  // happens, we don't clear the cache.
+  auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
+  if (CacheIt != AliasCache.end())
+    return CacheIt->second;
+
+  AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
+                                 LocB.Size, LocB.AATags);
+  // AliasCache rarely has more than 1 or 2 elements, always use
+  // shrink_and_clear so it quickly returns to the inline capacity of the
+  // SmallDenseMap if it ever grows larger.
+  // FIXME: This should really be shrink_to_inline_capacity_and_clear().
+  AliasCache.shrink_and_clear();
+  VisitedPhiBBs.clear();
+  return Alias;
+}
+
+/// Checks to see if the specified callsite can clobber the specified memory
+/// object.
+///
+/// Since we only look at local properties of this function, we really can't
+/// say much about this query.  We do, however, use simple "address taken"
+/// analysis on local objects.
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+                                        const MemoryLocation &Loc) {
+  assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+         "AliasAnalysis query involving multiple functions!");
+
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
+
+  // If this is a tail call and Loc.Ptr points to a stack location, we know that
+  // the tail call cannot access or modify the local stack.
+  // We cannot exclude byval arguments here; these belong to the caller of
+  // the current function not to the current function, and a tail callee
+  // may reference them.
+  if (isa<AllocaInst>(Object))
+    if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+      if (CI->isTailCall())
+        return MRI_NoModRef;
+
+  // If the pointer is to a locally allocated object that does not escape,
+  // then the call can not mod/ref the pointer unless the call takes the pointer
+  // as an argument, and itself doesn't capture it.
+  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+      isNonEscapingLocalObject(Object)) {
+    bool PassedAsArg = false;
+    unsigned ArgNo = 0;
+    for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+         CI != CE; ++CI, ++ArgNo) {
+      // Only look at the no-capture or byval pointer arguments.  If this
+      // pointer were passed to arguments that were neither of these, then it
+      // couldn't be no-capture.
+      if (!(*CI)->getType()->isPointerTy() ||
+          (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+        continue;
+
+      // If this is a no-capture pointer argument, see if we can tell that it
+      // is impossible to alias the pointer we're checking.  If not, we have to
+      // assume that the call could touch the pointer, even though it doesn't
+      // escape.
+      AliasResult AR =
+          getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+      if (AR) {
+        PassedAsArg = true;
+        break;
+      }
+    }
+
+    if (!PassedAsArg)
+      return MRI_NoModRef;
+  }
+
+  // While the assume intrinsic is marked as arbitrarily writing so that
+  // proper control dependencies will be maintained, it never aliases any
+  // particular memory location.
+  if (isAssumeIntrinsic(CS))
+    return MRI_NoModRef;
+
+  // The AAResultBase base class has some smarts, lets use them.
+  return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                        ImmutableCallSite CS2) {
+  // While the assume intrinsic is marked as arbitrarily writing so that
+  // proper control dependencies will be maintained, it never aliases any
+  // particular memory location.
+  if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
+    return MRI_NoModRef;
+
+  // The AAResultBase base class has some smarts, lets use them.
+  return AAResultBase::getModRefInfo(CS1, CS2);
+}
+
+/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
+/// both having the exact same pointer operand.
+static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
+                                            uint64_t V1Size,
+                                            const GEPOperator *GEP2,
+                                            uint64_t V2Size,
+                                            const DataLayout &DL) {
+
+  assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
+         "Expected GEPs with the same pointer operand");
+
+  // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
+  // such that the struct field accesses provably cannot alias.
+  // We also need at least two indices (the pointer, and the struct field).
+  if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
+      GEP1->getNumIndices() < 2)
+    return MayAlias;
+
+  // If we don't know the size of the accesses through both GEPs, we can't
+  // determine whether the struct fields accessed can't alias.
+  if (V1Size == MemoryLocation::UnknownSize ||
+      V2Size == MemoryLocation::UnknownSize)
+    return MayAlias;
+
+  ConstantInt *C1 =
+      dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
+  ConstantInt *C2 =
+      dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
+
+  // If the last (struct) indices are constants and are equal, the other indices
+  // might be also be dynamically equal, so the GEPs can alias.
+  if (C1 && C2 && C1 == C2)
+    return MayAlias;
+
+  // Find the last-indexed type of the GEP, i.e., the type you'd get if
+  // you stripped the last index.
+  // On the way, look at each indexed type.  If there's something other
+  // than an array, different indices can lead to different final types.
+  SmallVector<Value *, 8> IntermediateIndices;
+
+  // Insert the first index; we don't need to check the type indexed
+  // through it as it only drops the pointer indirection.
+  assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
+  IntermediateIndices.push_back(GEP1->getOperand(1));
+
+  // Insert all the remaining indices but the last one.
+  // Also, check that they all index through arrays.
+  for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
+    if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
+            GEP1->getSourceElementType(), IntermediateIndices)))
+      return MayAlias;
+    IntermediateIndices.push_back(GEP1->getOperand(i + 1));
+  }
+
+  auto *Ty = GetElementPtrInst::getIndexedType(
+    GEP1->getSourceElementType(), IntermediateIndices);
+  StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
+
+  if (isa<SequentialType>(Ty)) {
+    // We know that:
+    // - both GEPs begin indexing from the exact same pointer;
+    // - the last indices in both GEPs are constants, indexing into a sequential
+    //   type (array or pointer);
+    // - both GEPs only index through arrays prior to that.
+    //
+    // Because array indices greater than the number of elements are valid in
+    // GEPs, unless we know the intermediate indices are identical between
+    // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
+    // partially overlap. We also need to check that the loaded size matches
+    // the element size, otherwise we could still have overlap.
+    const uint64_t ElementSize =
+        DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType());
+    if (V1Size != ElementSize || V2Size != ElementSize)
+      return MayAlias;
+
+    for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
+      if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
+        return MayAlias;
+
+    // Now we know that the array/pointer that GEP1 indexes into and that
+    // that GEP2 indexes into must either precisely overlap or be disjoint.
+    // Because they cannot partially overlap and because fields in an array
+    // cannot overlap, if we can prove the final indices are different between
+    // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
+    
+    // If the last indices are constants, we've already checked they don't
+    // equal each other so we can exit early.
+    if (C1 && C2)
+      return NoAlias;
+    if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
+                        GEP2->getOperand(GEP2->getNumOperands() - 1),
+                        DL))
+      return NoAlias;
+    return MayAlias;
+  } else if (!LastIndexedStruct || !C1 || !C2) {
+    return MayAlias;
+  }
+
+  // We know that:
+  // - both GEPs begin indexing from the exact same pointer;
+  // - the last indices in both GEPs are constants, indexing into a struct;
+  // - said indices are different, hence, the pointed-to fields are different;
+  // - both GEPs only index through arrays prior to that.
+  //
+  // This lets us determine that the struct that GEP1 indexes into and the
+  // struct that GEP2 indexes into must either precisely overlap or be
+  // completely disjoint.  Because they cannot partially overlap, indexing into
+  // different non-overlapping fields of the struct will never alias.
+
+  // Therefore, the only remaining thing needed to show that both GEPs can't
+  // alias is that the fields are not overlapping.
+  const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
+  const uint64_t StructSize = SL->getSizeInBytes();
+  const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
+  const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
+
+  auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
+                                      uint64_t V2Off, uint64_t V2Size) {
+    return V1Off < V2Off && V1Off + V1Size <= V2Off &&
+           ((V2Off + V2Size <= StructSize) ||
+            (V2Off + V2Size - StructSize <= V1Off));
+  };
+
+  if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
+      EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
+    return NoAlias;
+
+  return MayAlias;
+}
+
+/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
+/// another pointer.
+///
+/// We know that V1 is a GEP, but we don't know anything about V2.
+/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
+/// V2.
+AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+                                    const AAMDNodes &V1AAInfo, const Value *V2,
+                                    uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                                    const Value *UnderlyingV1,
+                                    const Value *UnderlyingV2) {
+  int64_t GEP1BaseOffset;
+  bool GEP1MaxLookupReached;
+  SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+  // If we have two gep instructions with must-alias or not-alias'ing base
+  // pointers, figure out if the indexes to the GEP tell us anything about the
+  // derived pointer.
+  if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias =
+        aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
+                   UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes());
+
+    // Check for geps of non-aliasing underlying pointers where the offsets are
+    // identical.
+    if ((BaseAlias == MayAlias) && V1Size == V2Size) {
+      // Do the base pointers alias assuming type and size.
+      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
+                                                UnderlyingV2, V2Size, V2AAInfo);
+      if (PreciseBaseAlias == NoAlias) {
+        // See if the computed offset from the common pointer tells us about the
+        // relation of the resulting pointer.
+        int64_t GEP2BaseOffset;
+        bool GEP2MaxLookupReached;
+        SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+        const Value *GEP2BasePtr =
+            DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+                                   GEP2MaxLookupReached, DL, &AC, DT);
+        const Value *GEP1BasePtr =
+            DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                                   GEP1MaxLookupReached, DL, &AC, DT);
+        // DecomposeGEPExpression and GetUnderlyingObject should return the
+        // same result except when DecomposeGEPExpression has no DataLayout.
+        // FIXME: They always have a DataLayout so this should become an
+        // assert.
+        if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+          return MayAlias;
+        }
+        // If the max search depth is reached the result is undefined
+        if (GEP2MaxLookupReached || GEP1MaxLookupReached)
+          return MayAlias;
+
+        // Same offsets.
+        if (GEP1BaseOffset == GEP2BaseOffset &&
+            GEP1VariableIndices == GEP2VariableIndices)
+          return NoAlias;
+        GEP1VariableIndices.clear();
+      }
+    }
+
+    // If we get a No or May, then return it immediately, no amount of analysis
+    // will improve this situation.
+    if (BaseAlias != MustAlias)
+      return BaseAlias;
+
+    // Otherwise, we have a MustAlias.  Since the base pointers alias each other
+    // exactly, see if the computed offset from the common pointer tells us
+    // about the relation of the resulting pointer.
+    const Value *GEP1BasePtr =
+        DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                               GEP1MaxLookupReached, DL, &AC, DT);
+
+    int64_t GEP2BaseOffset;
+    bool GEP2MaxLookupReached;
+    SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+    const Value *GEP2BasePtr =
+        DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+                               GEP2MaxLookupReached, DL, &AC, DT);
+
+    // DecomposeGEPExpression and GetUnderlyingObject should return the
+    // same result except when DecomposeGEPExpression has no DataLayout.
+    // FIXME: They always have a DataLayout so this should become an assert.
+    if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+      return MayAlias;
+    }
+
+    // If we know the two GEPs are based off of the exact same pointer (and not
+    // just the same underlying object), see if that tells us anything about
+    // the resulting pointers.
+    if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
+      // If we couldn't find anything interesting, don't abandon just yet.
+      if (R != MayAlias)
+        return R;
+    }
+
+    // If the max search depth is reached the result is undefined
+    if (GEP2MaxLookupReached || GEP1MaxLookupReached)
+      return MayAlias;
+
+    // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+    // symbolic difference.
+    GEP1BaseOffset -= GEP2BaseOffset;
+    GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+
+  } else {
+    // Check to see if these two pointers are related by the getelementptr
+    // instruction.  If one pointer is a GEP with a non-zero index of the other
+    // pointer, we know they cannot alias.
+
+    // If both accesses are unknown size, we can't do anything useful here.
+    if (V1Size == MemoryLocation::UnknownSize &&
+        V2Size == MemoryLocation::UnknownSize)
+      return MayAlias;
+
+    AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
+                               AAMDNodes(), V2, V2Size, V2AAInfo);
+    if (R != MustAlias)
+      // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+      // If V2 is known not to alias GEP base pointer, then the two values
+      // cannot alias per GEP semantics: "A pointer value formed from a
+      // getelementptr instruction is associated with the addresses associated
+      // with the first operand of the getelementptr".
+      return R;
+
+    const Value *GEP1BasePtr =
+        DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                               GEP1MaxLookupReached, DL, &AC, DT);
+
+    // DecomposeGEPExpression and GetUnderlyingObject should return the
+    // same result except when DecomposeGEPExpression has no DataLayout.
+    // FIXME: They always have a DataLayout so this should become an assert.
+    if (GEP1BasePtr != UnderlyingV1) {
+      return MayAlias;
+    }
+    // If the max search depth is reached the result is undefined
+    if (GEP1MaxLookupReached)
+      return MayAlias;
+  }
+
+  // In the two GEP Case, if there is no difference in the offsets of the
+  // computed pointers, the resultant pointers are a must alias.  This
+  // hapens when we have two lexically identical GEP's (for example).
+  //
+  // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+  // must aliases the GEP, the end result is a must alias also.
+  if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+    return MustAlias;
+
+  // If there is a constant difference between the pointers, but the difference
+  // is less than the size of the associated memory object, then we know
+  // that the objects are partially overlapping.  If the difference is
+  // greater, we know they do not overlap.
+  if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+    if (GEP1BaseOffset >= 0) {
+      if (V2Size != MemoryLocation::UnknownSize) {
+        if ((uint64_t)GEP1BaseOffset < V2Size)
+          return PartialAlias;
+        return NoAlias;
+      }
+    } else {
+      // We have the situation where:
+      // +                +
+      // | BaseOffset     |
+      // ---------------->|
+      // |-->V1Size       |-------> V2Size
+      // GEP1             V2
+      // We need to know that V2Size is not unknown, otherwise we might have
+      // stripped a gep with negative index ('gep <ptr>, -1, ...).
+      if (V1Size != MemoryLocation::UnknownSize &&
+          V2Size != MemoryLocation::UnknownSize) {
+        if (-(uint64_t)GEP1BaseOffset < V1Size)
+          return PartialAlias;
+        return NoAlias;
+      }
+    }
+  }
+
+  if (!GEP1VariableIndices.empty()) {
+    uint64_t Modulo = 0;
+    bool AllPositive = true;
+    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+      // Try to distinguish something like &A[i][1] against &A[42][0].
+      // Grab the least significant bit set in any of the scales. We
+      // don't need std::abs here (even if the scale's negative) as we'll
+      // be ^'ing Modulo with itself later.
+      Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+
+      if (AllPositive) {
+        // If the Value could change between cycles, then any reasoning about
+        // the Value this cycle may not hold in the next cycle. We'll just
+        // give up if we can't determine conditions that hold for every cycle:
+        const Value *V = GEP1VariableIndices[i].V;
+
+        bool SignKnownZero, SignKnownOne;
+        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+                       0, &AC, nullptr, DT);
+
+        // Zero-extension widens the variable, and so forces the sign
+        // bit to zero.
+        bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
+        SignKnownZero |= IsZExt;
+        SignKnownOne &= !IsZExt;
+
+        // If the variable begins with a zero then we know it's
+        // positive, regardless of whether the value is signed or
+        // unsigned.
+        int64_t Scale = GEP1VariableIndices[i].Scale;
+        AllPositive =
+            (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+      }
+    }
+
+    Modulo = Modulo ^ (Modulo & (Modulo - 1));
+
+    // We can compute the difference between the two addresses
+    // mod Modulo. Check whether that difference guarantees that the
+    // two locations do not alias.
+    uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
+    if (V1Size != MemoryLocation::UnknownSize &&
+        V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
+        V1Size <= Modulo - ModOffset)
+      return NoAlias;
+
+    // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+    // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+    // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+    if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+      return NoAlias;
+
+    if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size,
+                                GEP1BaseOffset, &AC, DT))
+      return NoAlias;
+  }
+
+  // Statically, we can see that the base objects are the same, but the
+  // pointers have dynamic offsets which we can't resolve. And none of our
+  // little tricks above worked.
+  //
+  // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+  // practical effect of this is protecting TBAA in the case of dynamic
+  // indices into arrays of unions or malloc'd memory.
+  return PartialAlias;
+}
+
+static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
+  // If the results agree, take it.
+  if (A == B)
+    return A;
+  // A mix of PartialAlias and MustAlias is PartialAlias.
+  if ((A == PartialAlias && B == MustAlias) ||
+      (B == PartialAlias && A == MustAlias))
+    return PartialAlias;
+  // Otherwise, we don't know anything.
+  return MayAlias;
+}
+
+/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
+/// against another.
+AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
+                                       const AAMDNodes &SIAAInfo,
+                                       const Value *V2, uint64_t V2Size,
+                                       const AAMDNodes &V2AAInfo) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for aliases between the values on corresponding arms.
+  if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+    if (SI->getCondition() == SI2->getCondition()) {
+      AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+                                     SI2->getTrueValue(), V2Size, V2AAInfo);
+      if (Alias == MayAlias)
+        return MayAlias;
+      AliasResult ThisAlias =
+          aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+                     SI2->getFalseValue(), V2Size, V2AAInfo);
+      return MergeAliasResults(ThisAlias, Alias);
+    }
+
+  // If both arms of the Select node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  AliasResult Alias =
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  AliasResult ThisAlias =
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+  return MergeAliasResults(ThisAlias, Alias);
+}
+
+/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against
+/// another.
+AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                                    const AAMDNodes &PNAAInfo, const Value *V2,
+                                    uint64_t V2Size,
+                                    const AAMDNodes &V2AAInfo) {
+  // Track phi nodes we have visited. We use this information when we determine
+  // value equivalence.
+  VisitedPhiBBs.insert(PN->getParent());
+
+  // If the values are PHIs in the same block, we can do a more precise
+  // as well as efficient check: just check for aliases between the values
+  // on corresponding edges.
+  if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+    if (PN2->getParent() == PN->getParent()) {
+      LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
+                   MemoryLocation(V2, V2Size, V2AAInfo));
+      if (PN > V2)
+        std::swap(Locs.first, Locs.second);
+      // Analyse the PHIs' inputs under the assumption that the PHIs are
+      // NoAlias.
+      // If the PHIs are May/MustAlias there must be (recursively) an input
+      // operand from outside the PHIs' cycle that is MayAlias/MustAlias or
+      // there must be an operation on the PHIs within the PHIs' value cycle
+      // that causes a MayAlias.
+      // Pretend the phis do not alias.
+      AliasResult Alias = NoAlias;
+      assert(AliasCache.count(Locs) &&
+             "There must exist an entry for the phi node");
+      AliasResult OrigAliasResult = AliasCache[Locs];
+      AliasCache[Locs] = NoAlias;
+
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        AliasResult ThisAlias =
+            aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
+                       PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+                       V2Size, V2AAInfo);
+        Alias = MergeAliasResults(ThisAlias, Alias);
+        if (Alias == MayAlias)
+          break;
+      }
+
+      // Reset if speculation failed.
+      if (Alias != NoAlias)
+        AliasCache[Locs] = OrigAliasResult;
+
+      return Alias;
+    }
+
+  SmallPtrSet<Value *, 4> UniqueSrc;
+  SmallVector<Value *, 4> V1Srcs;
+  bool isRecursive = false;
+  for (Value *PV1 : PN->incoming_values()) {
+    if (isa<PHINode>(PV1))
+      // If any of the source itself is a PHI, return MayAlias conservatively
+      // to avoid compile time explosion. The worst possible case is if both
+      // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+      // and 'n' are the number of PHI sources.
+      return MayAlias;
+
+    if (EnableRecPhiAnalysis)
+      if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+        // Check whether the incoming value is a GEP that advances the pointer
+        // result of this PHI node (e.g. in a loop). If this is the case, we
+        // would recurse and always get a MayAlias. Handle this case specially
+        // below.
+        if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+            isa<ConstantInt>(PV1GEP->idx_begin())) {
+          isRecursive = true;
+          continue;
+        }
+      }
+
+    if (UniqueSrc.insert(PV1).second)
+      V1Srcs.push_back(PV1);
+  }
+
+  // If this PHI node is recursive, set the size of the accessed memory to
+  // unknown to represent all the possible values the GEP could advance the
+  // pointer to.
+  if (isRecursive)
+    PNSize = MemoryLocation::UnknownSize;
+
+  AliasResult Alias =
+      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+
+  // Early exit if the check of the first PHI source against V2 is MayAlias.
+  // Other results are not possible.
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+    Value *V = V1Srcs[i];
+
+    AliasResult ThisAlias =
+        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
+    Alias = MergeAliasResults(ThisAlias, Alias);
+    if (Alias == MayAlias)
+      break;
+  }
+
+  return Alias;
+}
+
+/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
+/// array references.
+AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
+                                      AAMDNodes V1AAInfo, const Value *V2,
+                                      uint64_t V2Size, AAMDNodes V2AAInfo) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are.
+  if (V1Size == 0 || V2Size == 0)
+    return NoAlias;
+
+  // Strip off any casts if they exist.
+  V1 = V1->stripPointerCasts();
+  V2 = V2->stripPointerCasts();
+
+  // If V1 or V2 is undef, the result is NoAlias because we can always pick a
+  // value for undef that aliases nothing in the program.
+  if (isa<UndefValue>(V1) || isa<UndefValue>(V2))
+    return NoAlias;
+
+  // Are we checking for alias of the same value?
+  // Because we look 'through' phi nodes we could look at "Value" pointers from
+  // different iterations. We must therefore make sure that this is not the
+  // case. The function isValueEqualInPotentialCycles ensures that this cannot
+  // happen by looking at the visited phi nodes and making sure they cannot
+  // reach the value.
+  if (isValueEqualInPotentialCycles(V1, V2))
+    return MustAlias;
+
+  if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+    return NoAlias; // Scalars cannot alias each other
+
+  // Figure out what objects these things are pointing to if we can.
+  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+
+  // Null values in the default address space don't point to any object, so they
+  // don't alias any other pointer.
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+
+  if (O1 != O2) {
+    // If V1/V2 point to two different objects we know that we have no alias.
+    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+      return NoAlias;
+
+    // Constant pointers can't alias with non-const isIdentifiedObject objects.
+    if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+        (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+      return NoAlias;
+
+    // Function arguments can't alias with things that are known to be
+    // unambigously identified at the function level.
+    if ((isa<Argument>(O1) && isIdentifiedFunctionLocal(O2)) ||
+        (isa<Argument>(O2) && isIdentifiedFunctionLocal(O1)))
+      return NoAlias;
+
+    // Most objects can't alias null.
+    if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+        (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+      return NoAlias;
+
+    // If one pointer is the result of a call/invoke or load and the other is a
+    // non-escaping local object within the same function, then we know the
+    // object couldn't escape to a point where the call could return it.
+    //
+    // Note that if the pointers are in different functions, there are a
+    // variety of complications. A call with a nocapture argument may still
+    // temporary store the nocapture argument's value in a temporary memory
+    // location if that memory location doesn't escape. Or it may pass a
+    // nocapture value to other functions as long as they don't capture it.
+    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+      return NoAlias;
+    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+      return NoAlias;
+  }
+
+  // If the size of one access is larger than the entire object on the other
+  // side, then we know such behavior is undefined and can assume no alias.
+  if ((V1Size != MemoryLocation::UnknownSize &&
+       isObjectSmallerThan(O2, V1Size, DL, TLI)) ||
+      (V2Size != MemoryLocation::UnknownSize &&
+       isObjectSmallerThan(O1, V2Size, DL, TLI)))
+    return NoAlias;
+
+  // Check the cache before climbing up use-def chains. This also terminates
+  // otherwise infinitely recursive queries.
+  LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
+               MemoryLocation(V2, V2Size, V2AAInfo));
+  if (V1 > V2)
+    std::swap(Locs.first, Locs.second);
+  std::pair<AliasCacheTy::iterator, bool> Pair =
+      AliasCache.insert(std::make_pair(Locs, MayAlias));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+  // GEP can't simplify, we don't even look at the PHI cases.
+  if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+    std::swap(O1, O2);
+    std::swap(V1AAInfo, V2AAInfo);
+  }
+  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+    AliasResult Result =
+        aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
+  }
+
+  if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+    std::swap(V1AAInfo, V2AAInfo);
+  }
+  if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
+  }
+
+  if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+    std::swap(V1AAInfo, V2AAInfo);
+  }
+  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+    AliasResult Result =
+        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
+  }
+
+  // If both pointers are pointing into the same object and one of them
+  // accesses is accessing the entire object, then the accesses must
+  // overlap in some way.
+  if (O1 == O2)
+    if ((V1Size != MemoryLocation::UnknownSize &&
+         isObjectSize(O1, V1Size, DL, TLI)) ||
+        (V2Size != MemoryLocation::UnknownSize &&
+         isObjectSize(O2, V2Size, DL, TLI)))
+      return AliasCache[Locs] = PartialAlias;
+
+  // Recurse back into the best AA results we have, potentially with refined
+  // memory locations. We have already ensured that BasicAA has a MayAlias
+  // cache result for these, so any recursion back into BasicAA won't loop.
+  AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
+  return AliasCache[Locs] = Result;
+}
+
+/// Check whether two Values can be considered equivalent.
+///
+/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether
+/// they can not be part of a cycle in the value graph by looking at all
+/// visited phi nodes an making sure that the phis cannot reach the value. We
+/// have to do this because we are looking through phi nodes (That is we say
+/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
+                                                  const Value *V2) {
+  if (V != V2)
+    return false;
+
+  const Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  if (VisitedPhiBBs.empty())
+    return true;
+
+  if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
+    return false;
+
+  // Make sure that the visited phis cannot reach the Value. This ensures that
+  // the Values cannot come from different iterations of a potential cycle the
+  // phi nodes could be involved in.
+  for (auto *P : VisitedPhiBBs)
+    if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
+      return false;
+
+  return true;
+}
+
+/// Computes the symbolic difference between two de-composed GEPs.
+///
+/// Dest and Src are the variable indices from two decomposed GetElementPtr
+/// instructions GEP1 and GEP2 which have common base pointers.
+void BasicAAResult::GetIndexDifference(
+    SmallVectorImpl<VariableGEPIndex> &Dest,
+    const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty())
+    return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
+    int64_t Scale = Src[i].Scale;
+
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
+          Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
+        continue;
+
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin() + j);
+      Scale = 0;
+      break;
+    }
+
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
+      Dest.push_back(Entry);
+    }
+  }
+}
+
+bool BasicAAResult::constantOffsetHeuristic(
+    const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size,
+    uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC,
+    DominatorTree *DT) {
+  if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
+      V2Size == MemoryLocation::UnknownSize)
+    return false;
+
+  const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+
+  if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
+      Var0.Scale != -Var1.Scale)
+    return false;
+
+  unsigned Width = Var1.V->getType()->getIntegerBitWidth();
+
+  // We'll strip off the Extensions of Var0 and Var1 and do another round
+  // of GetLinearExpression decomposition. In the example above, if Var0
+  // is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
+
+  APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0),
+      V1Offset(Width, 0);
+  bool NSW = true, NUW = true;
+  unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0;
+  const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits,
+                                        V0SExtBits, DL, 0, AC, DT, NSW, NUW);
+  NSW = true, NUW = true;
+  const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits,
+                                        V1SExtBits, DL, 0, AC, DT, NSW, NUW);
+
+  if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits ||
+      V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1))
+    return false;
+
+  // We have a hit - Var0 and Var1 only differ by a constant offset!
+
+  // If we've been sext'ed then zext'd the maximum difference between Var0 and
+  // Var1 is possible to calculate, but we're just interested in the absolute
+  // minimum difference between the two. The minimum distance may occur due to
+  // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so
+  // the minimum distance between %i and %i + 5 is 3.
+  APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
+  MinDiff = APIntOps::umin(MinDiff, Wrapped);
+  uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+
+  // We can't definitely say whether GEP1 is before or after V2 due to wrapping
+  // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
+  // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
+  // V2Size can fit in the MinDiffBytes gap.
+  return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
+         V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+char BasicAA::PassID;
+
+BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return BasicAAResult(F.getParent()->getDataLayout(),
+                       AM->getResult<TargetLibraryAnalysis>(F),
+                       AM->getResult<AssumptionAnalysis>(F),
+                       AM->getCachedResult<DominatorTreeAnalysis>(F),
+                       AM->getCachedResult<LoopAnalysis>(F));
+}
+
+BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
+    initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char BasicAAWrapperPass::ID = 0;
+void BasicAAWrapperPass::anchor() {}
+
+INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
+                      "Basic Alias Analysis (stateless AA impl)", true, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
+                    "Basic Alias Analysis (stateless AA impl)", true, true)
+
+FunctionPass *llvm::createBasicAAWrapperPass() {
+  return new BasicAAWrapperPass();
+}
+
+bool BasicAAWrapperPass::runOnFunction(Function &F) {
+  auto &ACT = getAnalysis<AssumptionCacheTracker>();
+  auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+
+  Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
+                                 ACT.getAssumptionCache(F),
+                                 DTWP ? &DTWP->getDomTree() : nullptr,
+                                 LIWP ? &LIWP->getLoopInfo() : nullptr));
+
+  return false;
+}
+
+void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
+  return BasicAAResult(
+      F.getParent()->getDataLayout(),
+      P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
new file mode 100644
index 0000000..90b7a33
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -0,0 +1,213 @@
+//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "block-freq"
+
+#ifndef NDEBUG
+enum GVDAGType {
+  GVDT_None,
+  GVDT_Fraction,
+  GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden,
+          cl::desc("Pop up a window to show a dag displaying how block "
+                   "frequencies propagation through the CFG."),
+          cl::values(
+            clEnumValN(GVDT_None, "none",
+                       "do not display graphs."),
+            clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+                       "fractional block frequency representation."),
+            clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+                       "integer fractional block frequency representation."),
+            clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<BlockFrequencyInfo *> {
+  typedef const BasicBlock NodeType;
+  typedef succ_const_iterator ChildIteratorType;
+  typedef Function::const_iterator nodes_iterator;
+
+  static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+    return &G->getFunction()->front();
+  }
+  static ChildIteratorType child_begin(const NodeType *N) {
+    return succ_begin(N);
+  }
+  static ChildIteratorType child_end(const NodeType *N) {
+    return succ_end(N);
+  }
+  static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
+    return G->getFunction()->begin();
+  }
+  static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
+    return G->getFunction()->end();
+  }
+};
+
+template<>
+struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
+  explicit DOTGraphTraits(bool isSimple=false) :
+    DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const BlockFrequencyInfo *G) {
+    return G->getFunction()->getName();
+  }
+
+  std::string getNodeLabel(const BasicBlock *Node,
+                           const BlockFrequencyInfo *Graph) {
+    std::string Result;
+    raw_string_ostream OS(Result);
+
+    OS << Node->getName() << ":";
+    switch (ViewBlockFreqPropagationDAG) {
+    case GVDT_Fraction:
+      Graph->printBlockFreq(OS, Node);
+      break;
+    case GVDT_Integer:
+      OS << Graph->getBlockFreq(Node).getFrequency();
+      break;
+    case GVDT_None:
+      llvm_unreachable("If we are not supposed to render a graph we should "
+                       "never reach this point.");
+    }
+
+    return Result;
+  }
+};
+
+} // end namespace llvm
+#endif
+
+BlockFrequencyInfo::BlockFrequencyInfo() {}
+
+BlockFrequencyInfo::BlockFrequencyInfo(const Function &F,
+                                       const BranchProbabilityInfo &BPI,
+                                       const LoopInfo &LI) {
+  calculate(F, BPI, LI);
+}
+
+void BlockFrequencyInfo::calculate(const Function &F,
+                                   const BranchProbabilityInfo &BPI,
+                                   const LoopInfo &LI) {
+  if (!BFI)
+    BFI.reset(new ImplType);
+  BFI->calculate(F, BPI, LI);
+#ifndef NDEBUG
+  if (ViewBlockFreqPropagationDAG != GVDT_None)
+    view();
+#endif
+}
+
+BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
+  return BFI ? BFI->getBlockFreq(BB) : 0;
+}
+
+void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
+                                      uint64_t Freq) {
+  assert(BFI && "Expected analysis to be available");
+  BFI->setBlockFreq(BB, Freq);
+}
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void BlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+  ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+#else
+  errs() << "BlockFrequencyInfo::view is only available in debug builds on "
+            "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+const Function *BlockFrequencyInfo::getFunction() const {
+  return BFI ? BFI->getFunction() : nullptr;
+}
+
+raw_ostream &BlockFrequencyInfo::
+printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const {
+  return BFI ? BFI->printBlockFreq(OS, Freq) : OS;
+}
+
+raw_ostream &
+BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+                                   const BasicBlock *BB) const {
+  return BFI ? BFI->printBlockFreq(OS, BB) : OS;
+}
+
+uint64_t BlockFrequencyInfo::getEntryFreq() const {
+  return BFI ? BFI->getEntryFreq() : 0;
+}
+
+void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
+
+void BlockFrequencyInfo::print(raw_ostream &OS) const {
+  if (BFI)
+    BFI->print(OS);
+}
+
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
+                      "Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq",
+                    "Block Frequency Analysis", true, true)
+
+char BlockFrequencyInfoWrapperPass::ID = 0;
+
+
+BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass()
+    : FunctionPass(ID) {
+  initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {}
+
+void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS,
+                                          const Module *) const {
+  BFI.print(OS);
+}
+
+void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); }
+
+bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI =
+      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BFI.calculate(F, BPI, LI);
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
new file mode 100644
index 0000000..48e23af
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -0,0 +1,769 @@
+//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace llvm;
+using namespace llvm::bfi_detail;
+
+#define DEBUG_TYPE "block-freq"
+
+ScaledNumber<uint64_t> BlockMass::toScaled() const {
+  if (isFull())
+    return ScaledNumber<uint64_t>(1, 0);
+  return ScaledNumber<uint64_t>(getMass() + 1, -64);
+}
+
+void BlockMass::dump() const { print(dbgs()); }
+
+static char getHexDigit(int N) {
+  assert(N < 16);
+  if (N < 10)
+    return '0' + N;
+  return 'a' + N - 10;
+}
+raw_ostream &BlockMass::print(raw_ostream &OS) const {
+  for (int Digits = 0; Digits < 16; ++Digits)
+    OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
+  return OS;
+}
+
+namespace {
+
+typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+typedef BlockFrequencyInfoImplBase::Distribution Distribution;
+typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
+typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
+typedef BlockFrequencyInfoImplBase::LoopData LoopData;
+typedef BlockFrequencyInfoImplBase::Weight Weight;
+typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
+
+/// \brief Dithering mass distributer.
+///
+/// This class splits up a single mass into portions by weight, dithering to
+/// spread out error.  No mass is lost.  The dithering precision depends on the
+/// precision of the product of \a BlockMass and \a BranchProbability.
+///
+/// The distribution algorithm follows.
+///
+///  1. Initialize by saving the sum of the weights in \a RemWeight and the
+///     mass to distribute in \a RemMass.
+///
+///  2. For each portion:
+///
+///      1. Construct a branch probability, P, as the portion's weight divided
+///         by the current value of \a RemWeight.
+///      2. Calculate the portion's mass as \a RemMass times P.
+///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
+///         the current portion's weight and mass.
+struct DitheringDistributer {
+  uint32_t RemWeight;
+  BlockMass RemMass;
+
+  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
+
+  BlockMass takeMass(uint32_t Weight);
+};
+
+} // end namespace
+
+DitheringDistributer::DitheringDistributer(Distribution &Dist,
+                                           const BlockMass &Mass) {
+  Dist.normalize();
+  RemWeight = Dist.Total;
+  RemMass = Mass;
+}
+
+BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
+  assert(Weight && "invalid weight");
+  assert(Weight <= RemWeight);
+  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
+
+  // Decrement totals (dither).
+  RemWeight -= Weight;
+  RemMass -= Mass;
+  return Mass;
+}
+
+void Distribution::add(const BlockNode &Node, uint64_t Amount,
+                       Weight::DistType Type) {
+  assert(Amount && "invalid weight of 0");
+  uint64_t NewTotal = Total + Amount;
+
+  // Check for overflow.  It should be impossible to overflow twice.
+  bool IsOverflow = NewTotal < Total;
+  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
+  DidOverflow |= IsOverflow;
+
+  // Update the total.
+  Total = NewTotal;
+
+  // Save the weight.
+  Weights.push_back(Weight(Type, Node, Amount));
+}
+
+static void combineWeight(Weight &W, const Weight &OtherW) {
+  assert(OtherW.TargetNode.isValid());
+  if (!W.Amount) {
+    W = OtherW;
+    return;
+  }
+  assert(W.Type == OtherW.Type);
+  assert(W.TargetNode == OtherW.TargetNode);
+  assert(OtherW.Amount && "Expected non-zero weight");
+  if (W.Amount > W.Amount + OtherW.Amount)
+    // Saturate on overflow.
+    W.Amount = UINT64_MAX;
+  else
+    W.Amount += OtherW.Amount;
+}
+static void combineWeightsBySorting(WeightList &Weights) {
+  // Sort so edges to the same node are adjacent.
+  std::sort(Weights.begin(), Weights.end(),
+            [](const Weight &L,
+               const Weight &R) { return L.TargetNode < R.TargetNode; });
+
+  // Combine adjacent edges.
+  WeightList::iterator O = Weights.begin();
+  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
+       ++O, (I = L)) {
+    *O = *I;
+
+    // Find the adjacent weights to the same node.
+    for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
+      combineWeight(*O, *L);
+  }
+
+  // Erase extra entries.
+  Weights.erase(O, Weights.end());
+  return;
+}
+static void combineWeightsByHashing(WeightList &Weights) {
+  // Collect weights into a DenseMap.
+  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
+  HashTable Combined(NextPowerOf2(2 * Weights.size()));
+  for (const Weight &W : Weights)
+    combineWeight(Combined[W.TargetNode.Index], W);
+
+  // Check whether anything changed.
+  if (Weights.size() == Combined.size())
+    return;
+
+  // Fill in the new weights.
+  Weights.clear();
+  Weights.reserve(Combined.size());
+  for (const auto &I : Combined)
+    Weights.push_back(I.second);
+}
+static void combineWeights(WeightList &Weights) {
+  // Use a hash table for many successors to keep this linear.
+  if (Weights.size() > 128) {
+    combineWeightsByHashing(Weights);
+    return;
+  }
+
+  combineWeightsBySorting(Weights);
+}
+static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
+  assert(Shift >= 0);
+  assert(Shift < 64);
+  if (!Shift)
+    return N;
+  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
+}
+void Distribution::normalize() {
+  // Early exit for termination nodes.
+  if (Weights.empty())
+    return;
+
+  // Only bother if there are multiple successors.
+  if (Weights.size() > 1)
+    combineWeights(Weights);
+
+  // Early exit when combined into a single successor.
+  if (Weights.size() == 1) {
+    Total = 1;
+    Weights.front().Amount = 1;
+    return;
+  }
+
+  // Determine how much to shift right so that the total fits into 32-bits.
+  //
+  // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
+  // for each weight can cause a 32-bit overflow.
+  int Shift = 0;
+  if (DidOverflow)
+    Shift = 33;
+  else if (Total > UINT32_MAX)
+    Shift = 33 - countLeadingZeros(Total);
+
+  // Early exit if nothing needs to be scaled.
+  if (!Shift) {
+    // If we didn't overflow then combineWeights() shouldn't have changed the
+    // sum of the weights, but let's double-check.
+    assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
+                                    [](uint64_t Sum, const Weight &W) {
+                      return Sum + W.Amount;
+                    }) &&
+           "Expected total to be correct");
+    return;
+  }
+
+  // Recompute the total through accumulation (rather than shifting it) so that
+  // it's accurate after shifting and any changes combineWeights() made above.
+  Total = 0;
+
+  // Sum the weights to each node and shift right if necessary.
+  for (Weight &W : Weights) {
+    // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
+    // can round here without concern about overflow.
+    assert(W.TargetNode.isValid());
+    W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
+    assert(W.Amount <= UINT32_MAX);
+
+    // Update the total.
+    Total += W.Amount;
+  }
+  assert(Total <= UINT32_MAX);
+}
+
+void BlockFrequencyInfoImplBase::clear() {
+  // Swap with a default-constructed std::vector, since std::vector<>::clear()
+  // does not actually clear heap storage.
+  std::vector<FrequencyData>().swap(Freqs);
+  std::vector<WorkingData>().swap(Working);
+  Loops.clear();
+}
+
+/// \brief Clear all memory not needed downstream.
+///
+/// Releases all memory not used downstream.  In particular, saves Freqs.
+static void cleanup(BlockFrequencyInfoImplBase &BFI) {
+  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
+  BFI.clear();
+  BFI.Freqs = std::move(SavedFreqs);
+}
+
+bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
+                                           const LoopData *OuterLoop,
+                                           const BlockNode &Pred,
+                                           const BlockNode &Succ,
+                                           uint64_t Weight) {
+  if (!Weight)
+    Weight = 1;
+
+  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
+    return OuterLoop && OuterLoop->isHeader(Node);
+  };
+
+  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
+
+#ifndef NDEBUG
+  auto debugSuccessor = [&](const char *Type) {
+    dbgs() << "  =>"
+           << " [" << Type << "] weight = " << Weight;
+    if (!isLoopHeader(Resolved))
+      dbgs() << ", succ = " << getBlockName(Succ);
+    if (Resolved != Succ)
+      dbgs() << ", resolved = " << getBlockName(Resolved);
+    dbgs() << "\n";
+  };
+  (void)debugSuccessor;
+#endif
+
+  if (isLoopHeader(Resolved)) {
+    DEBUG(debugSuccessor("backedge"));
+    Dist.addBackedge(Resolved, Weight);
+    return true;
+  }
+
+  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
+    DEBUG(debugSuccessor("  exit  "));
+    Dist.addExit(Resolved, Weight);
+    return true;
+  }
+
+  if (Resolved < Pred) {
+    if (!isLoopHeader(Pred)) {
+      // If OuterLoop is an irreducible loop, we can't actually handle this.
+      assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
+             "unhandled irreducible control flow");
+
+      // Irreducible backedge.  Abort.
+      DEBUG(debugSuccessor("abort!!!"));
+      return false;
+    }
+
+    // If "Pred" is a loop header, then this isn't really a backedge; rather,
+    // OuterLoop must be irreducible.  These false backedges can come only from
+    // secondary loop headers.
+    assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
+           "unhandled irreducible control flow");
+  }
+
+  DEBUG(debugSuccessor(" local  "));
+  Dist.addLocal(Resolved, Weight);
+  return true;
+}
+
+bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
+    const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
+  // Copy the exit map into Dist.
+  for (const auto &I : Loop.Exits)
+    if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
+                   I.second.getMass()))
+      // Irreducible backedge.
+      return false;
+
+  return true;
+}
+
+/// \brief Compute the loop scale for a loop.
+void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
+  // Compute loop scale.
+  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+
+  // Infinite loops need special handling. If we give the back edge an infinite
+  // mass, they may saturate all the other scales in the function down to 1,
+  // making all the other region temperatures look exactly the same. Choose an
+  // arbitrary scale to avoid these issues.
+  //
+  // FIXME: An alternate way would be to select a symbolic scale which is later
+  // replaced to be the maximum of all computed scales plus 1. This would
+  // appropriately describe the loop as having a large scale, without skewing
+  // the final frequency computation.
+  const Scaled64 InifiniteLoopScale(1, 12);
+
+  // LoopScale == 1 / ExitMass
+  // ExitMass == HeadMass - BackedgeMass
+  BlockMass TotalBackedgeMass;
+  for (auto &Mass : Loop.BackedgeMass)
+    TotalBackedgeMass += Mass;
+  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
+
+  // Block scale stores the inverse of the scale. If this is an infinite loop,
+  // its exit mass will be zero. In this case, use an arbitrary scale for the
+  // loop scale.
+  Loop.Scale =
+      ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
+
+  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
+               << " - " << TotalBackedgeMass << ")\n"
+               << " - scale = " << Loop.Scale << "\n");
+}
+
+/// \brief Package up a loop.
+void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
+  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
+
+  // Clear the subloop exits to prevent quadratic memory usage.
+  for (const BlockNode &M : Loop.Nodes) {
+    if (auto *Loop = Working[M.Index].getPackagedLoop())
+      Loop->Exits.clear();
+    DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
+  }
+  Loop.IsPackaged = true;
+}
+
+#ifndef NDEBUG
+static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
+                        const DitheringDistributer &D, const BlockNode &T,
+                        const BlockMass &M, const char *Desc) {
+  dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
+  if (Desc)
+    dbgs() << " [" << Desc << "]";
+  if (T.isValid())
+    dbgs() << " to " << BFI.getBlockName(T);
+  dbgs() << "\n";
+}
+#endif
+
+void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
+                                                LoopData *OuterLoop,
+                                                Distribution &Dist) {
+  BlockMass Mass = Working[Source.Index].getMass();
+  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
+
+  // Distribute mass to successors as laid out in Dist.
+  DitheringDistributer D(Dist, Mass);
+
+  for (const Weight &W : Dist.Weights) {
+    // Check for a local edge (non-backedge and non-exit).
+    BlockMass Taken = D.takeMass(W.Amount);
+    if (W.Type == Weight::Local) {
+      Working[W.TargetNode.Index].getMass() += Taken;
+      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+      continue;
+    }
+
+    // Backedges and exits only make sense if we're processing a loop.
+    assert(OuterLoop && "backedge or exit outside of loop");
+
+    // Check for a backedge.
+    if (W.Type == Weight::Backedge) {
+      OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
+      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
+      continue;
+    }
+
+    // This must be an exit.
+    assert(W.Type == Weight::Exit);
+    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
+  }
+}
+
+static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
+                                     const Scaled64 &Min, const Scaled64 &Max) {
+  // Scale the Factor to a size that creates integers.  Ideally, integers would
+  // be scaled so that Max == UINT64_MAX so that they can be best
+  // differentiated.  However, in the presence of large frequency values, small
+  // frequencies are scaled down to 1, making it impossible to differentiate
+  // small, unequal numbers. When the spread between Min and Max frequencies
+  // fits well within MaxBits, we make the scale be at least 8.
+  const unsigned MaxBits = 64;
+  const unsigned SpreadBits = (Max / Min).lg();
+  Scaled64 ScalingFactor;
+  if (SpreadBits <= MaxBits - 3) {
+    // If the values are small enough, make the scaling factor at least 8 to
+    // allow distinguishing small values.
+    ScalingFactor = Min.inverse();
+    ScalingFactor <<= 3;
+  } else {
+    // If the values need more than MaxBits to be represented, saturate small
+    // frequency values down to 1 by using a scaling factor that benefits large
+    // frequency values.
+    ScalingFactor = Scaled64(1, MaxBits) / Max;
+  }
+
+  // Translate the floats to integers.
+  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
+               << ", factor = " << ScalingFactor << "\n");
+  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
+    Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
+    BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
+    DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
+                 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
+                 << ", int = " << BFI.Freqs[Index].Integer << "\n");
+  }
+}
+
+/// \brief Unwrap a loop package.
+///
+/// Visits all the members of a loop, adjusting their BlockData according to
+/// the loop's pseudo-node.
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
+               << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+               << "\n");
+  Loop.Scale *= Loop.Mass.toScaled();
+  Loop.IsPackaged = false;
+  DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
+
+  // Propagate the head scale through the loop.  Since members are visited in
+  // RPO, the head scale will be updated by the loop scale first, and then the
+  // final head scale will be used for updated the rest of the members.
+  for (const BlockNode &N : Loop.Nodes) {
+    const auto &Working = BFI.Working[N.Index];
+    Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
+                                       : BFI.Freqs[N.Index].Scaled;
+    Scaled64 New = Loop.Scale * F;
+    DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+                 << "\n");
+    F = New;
+  }
+}
+
+void BlockFrequencyInfoImplBase::unwrapLoops() {
+  // Set initial frequencies from loop-local masses.
+  for (size_t Index = 0; Index < Working.size(); ++Index)
+    Freqs[Index].Scaled = Working[Index].Mass.toScaled();
+
+  for (LoopData &Loop : Loops)
+    unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
+  // Unwrap loop packages in reverse post-order, tracking min and max
+  // frequencies.
+  auto Min = Scaled64::getLargest();
+  auto Max = Scaled64::getZero();
+  for (size_t Index = 0; Index < Working.size(); ++Index) {
+    // Update min/max scale.
+    Min = std::min(Min, Freqs[Index].Scaled);
+    Max = std::max(Max, Freqs[Index].Scaled);
+  }
+
+  // Convert to integers.
+  convertFloatingToInteger(*this, Min, Max);
+
+  // Clean up data structures.
+  cleanup(*this);
+
+  // Print out the final stats.
+  DEBUG(dump());
+}
+
+BlockFrequency
+BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
+  if (!Node.isValid())
+    return 0;
+  return Freqs[Node.Index].Integer;
+}
+Scaled64
+BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
+  if (!Node.isValid())
+    return Scaled64::getZero();
+  return Freqs[Node.Index].Scaled;
+}
+
+void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
+                                              uint64_t Freq) {
+  assert(Node.isValid() && "Expected valid node");
+  assert(Node.Index < Freqs.size() && "Expected legal index");
+  Freqs[Node.Index].Integer = Freq;
+}
+
+std::string
+BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
+  return std::string();
+}
+std::string
+BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
+  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+                                           const BlockNode &Node) const {
+  return OS << getFloatingBlockFreq(Node);
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+                                           const BlockFrequency &Freq) const {
+  Scaled64 Block(Freq.getFrequency(), 0);
+  Scaled64 Entry(getEntryFreq(), 0);
+
+  return OS << Block / Entry;
+}
+
+void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
+  Start = OuterLoop.getHeader();
+  Nodes.reserve(OuterLoop.Nodes.size());
+  for (auto N : OuterLoop.Nodes)
+    addNode(N);
+  indexNodes();
+}
+void IrreducibleGraph::addNodesInFunction() {
+  Start = 0;
+  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
+    if (!BFI.Working[Index].isPackaged())
+      addNode(Index);
+  indexNodes();
+}
+void IrreducibleGraph::indexNodes() {
+  for (auto &I : Nodes)
+    Lookup[I.Node.Index] = &I;
+}
+void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
+                               const BFIBase::LoopData *OuterLoop) {
+  if (OuterLoop && OuterLoop->isHeader(Succ))
+    return;
+  auto L = Lookup.find(Succ.Index);
+  if (L == Lookup.end())
+    return;
+  IrrNode &SuccIrr = *L->second;
+  Irr.Edges.push_back(&SuccIrr);
+  SuccIrr.Edges.push_front(&Irr);
+  ++SuccIrr.NumIn;
+}
+
+namespace llvm {
+template <> struct GraphTraits<IrreducibleGraph> {
+  typedef bfi_detail::IrreducibleGraph GraphT;
+
+  typedef const GraphT::IrrNode NodeType;
+  typedef GraphT::IrrNode::iterator ChildIteratorType;
+
+  static const NodeType *getEntryNode(const GraphT &G) {
+    return G.StartIrr;
+  }
+  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+};
+}
+
+/// \brief Find extra irreducible headers.
+///
+/// Find entry blocks and other blocks with backedges, which exist when \c G
+/// contains irreducible sub-SCCs.
+static void findIrreducibleHeaders(
+    const BlockFrequencyInfoImplBase &BFI,
+    const IrreducibleGraph &G,
+    const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
+    LoopData::NodeList &Headers, LoopData::NodeList &Others) {
+  // Map from nodes in the SCC to whether it's an entry block.
+  SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
+
+  // InSCC also acts the set of nodes in the graph.  Seed it.
+  for (const auto *I : SCC)
+    InSCC[I] = false;
+
+  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
+    auto &Irr = *I->first;
+    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+      if (InSCC.count(P))
+        continue;
+
+      // This is an entry block.
+      I->second = true;
+      Headers.push_back(Irr.Node);
+      DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
+      break;
+    }
+  }
+  assert(Headers.size() >= 2 &&
+         "Expected irreducible CFG; -loop-info is likely invalid");
+  if (Headers.size() == InSCC.size()) {
+    // Every block is a header.
+    std::sort(Headers.begin(), Headers.end());
+    return;
+  }
+
+  // Look for extra headers from irreducible sub-SCCs.
+  for (const auto &I : InSCC) {
+    // Entry blocks are already headers.
+    if (I.second)
+      continue;
+
+    auto &Irr = *I.first;
+    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+      // Skip forward edges.
+      if (P->Node < Irr.Node)
+        continue;
+
+      // Skip predecessors from entry blocks.  These can have inverted
+      // ordering.
+      if (InSCC.lookup(P))
+        continue;
+
+      // Store the extra header.
+      Headers.push_back(Irr.Node);
+      DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
+      break;
+    }
+    if (Headers.back() == Irr.Node)
+      // Added this as a header.
+      continue;
+
+    // This is not a header.
+    Others.push_back(Irr.Node);
+    DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
+  }
+  std::sort(Headers.begin(), Headers.end());
+  std::sort(Others.begin(), Others.end());
+}
+
+static void createIrreducibleLoop(
+    BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
+    LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
+    const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
+  // Translate the SCC into RPO.
+  DEBUG(dbgs() << " - found-scc\n");
+
+  LoopData::NodeList Headers;
+  LoopData::NodeList Others;
+  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
+
+  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
+                                Headers.end(), Others.begin(), Others.end());
+
+  // Update loop hierarchy.
+  for (const auto &N : Loop->Nodes)
+    if (BFI.Working[N.Index].isLoopHeader())
+      BFI.Working[N.Index].Loop->Parent = &*Loop;
+    else
+      BFI.Working[N.Index].Loop = &*Loop;
+}
+
+iterator_range<std::list<LoopData>::iterator>
+BlockFrequencyInfoImplBase::analyzeIrreducible(
+    const IrreducibleGraph &G, LoopData *OuterLoop,
+    std::list<LoopData>::iterator Insert) {
+  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
+  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
+
+  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
+    if (I->size() < 2)
+      continue;
+
+    // Translate the SCC into RPO.
+    createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
+  }
+
+  if (OuterLoop)
+    return make_range(std::next(Prev), Insert);
+  return make_range(Loops.begin(), Insert);
+}
+
+void
+BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
+  OuterLoop.Exits.clear();
+  for (auto &Mass : OuterLoop.BackedgeMass)
+    Mass = BlockMass::getEmpty();
+  auto O = OuterLoop.Nodes.begin() + 1;
+  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
+    if (!Working[I->Index].isPackaged())
+      *O++ = *I;
+  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
+}
+
+void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
+  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
+
+  // Since the loop has more than one header block, the mass flowing back into
+  // each header will be different. Adjust the mass in each header loop to
+  // reflect the masses flowing through back edges.
+  //
+  // To do this, we distribute the initial mass using the backedge masses
+  // as weights for the distribution.
+  BlockMass LoopMass = BlockMass::getFull();
+  Distribution Dist;
+
+  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
+  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
+    auto &HeaderNode = Loop.Nodes[H];
+    auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
+    DEBUG(dbgs() << " - Add back edge mass for node "
+                 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
+    if (BackedgeMass.getMass() > 0)
+      Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+    else
+      DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
+  }
+
+  DitheringDistributer D(Dist, LoopMass);
+
+  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
+               << " to headers using above weights\n");
+  for (const Weight &W : Dist.Weights) {
+    BlockMass Taken = D.takeMass(W.Amount);
+    assert(W.Type == Weight::Local && "all weights should be local");
+    Working[W.TargetNode.Index].getMass() = Taken;
+    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 0000000..cf0cc8d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,685 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-prob"
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
+                      "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
+                    "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfoWrapperPass::ID = 0;
+
+// Weights are for internal use only. They are used by heuristics to help to
+// estimate edges' probability. Example:
+//
+// Using "Loop Branch Heuristics" we predict weights of edges for the
+// block BB2.
+//         ...
+//          |
+//          V
+//         BB1<-+
+//          |   |
+//          |   | (Weight = 124)
+//          V   |
+//         BB2--+
+//          |
+//          | (Weight = 4)
+//          V
+//         BB3
+//
+// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
+static const uint32_t LBH_TAKEN_WEIGHT = 124;
+static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+/// \brief Unreachable-terminating branch taken weight.
+///
+/// This is the weight for a branch being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static const uint32_t UR_TAKEN_WEIGHT = 1;
+
+/// \brief Unreachable-terminating branch not-taken weight.
+///
+/// This is the weight for a branch not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+
+/// \brief Weight for a branch taken going into a cold block.
+///
+/// This is the weight for a branch taken toward a block marked
+/// cold.  A block is marked cold if it's postdominated by a
+/// block containing a call to a cold function.  Cold functions
+/// are those marked with attribute 'cold'.
+static const uint32_t CC_TAKEN_WEIGHT = 4;
+
+/// \brief Weight for a branch not-taken into a cold block.
+///
+/// This is the weight for a branch not taken toward a block marked
+/// cold.
+static const uint32_t CC_NONTAKEN_WEIGHT = 64;
+
+static const uint32_t PH_TAKEN_WEIGHT = 20;
+static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t ZH_TAKEN_WEIGHT = 20;
+static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t FPH_TAKEN_WEIGHT = 20;
+static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+
+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 0) {
+    if (isa<UnreachableInst>(TI))
+      PostDominatedByUnreachable.insert(BB);
+    return false;
+  }
+
+  SmallVector<unsigned, 4> UnreachableEdges;
+  SmallVector<unsigned, 4> ReachableEdges;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    if (PostDominatedByUnreachable.count(*I))
+      UnreachableEdges.push_back(I.getSuccessorIndex());
+    else
+      ReachableEdges.push_back(I.getSuccessorIndex());
+  }
+
+  // If all successors are in the set of blocks post-dominated by unreachable,
+  // this block is too.
+  if (UnreachableEdges.size() == TI->getNumSuccessors())
+    PostDominatedByUnreachable.insert(BB);
+
+  // Skip probabilities if this block has a single successor or if all were
+  // reachable.
+  if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
+    return false;
+
+  // If the terminator is an InvokeInst, check only the normal destination block
+  // as the unwind edge of InvokeInst is also very unlikely taken.
+  if (auto *II = dyn_cast<InvokeInst>(TI))
+    if (PostDominatedByUnreachable.count(II->getNormalDest())) {
+      PostDominatedByUnreachable.insert(BB);
+      // Return false here so that edge weights for InvokeInst could be decided
+      // in calcInvokeHeuristics().
+      return false;
+    }
+
+  if (ReachableEdges.empty()) {
+    BranchProbability Prob(1, UnreachableEdges.size());
+    for (unsigned SuccIdx : UnreachableEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
+    return true;
+  }
+
+  BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
+                                    (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                                        UnreachableEdges.size());
+  BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
+                                  (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                                      ReachableEdges.size());
+
+  for (unsigned SuccIdx : UnreachableEdges)
+    setEdgeProbability(BB, SuccIdx, UnreachableProb);
+  for (unsigned SuccIdx : ReachableEdges)
+    setEdgeProbability(BB, SuccIdx, ReachableProb);
+
+  return true;
+}
+
+// Propagate existing explicit probabilities from either profile data or
+// 'expect' intrinsic processing.
+bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 1)
+    return false;
+  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+    return false;
+
+  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+  if (!WeightsNode)
+    return false;
+
+  // Check that the number of successors is manageable.
+  assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
+
+  // Ensure there are weights for all of the successors. Note that the first
+  // operand to the metadata node is a name, not a weight.
+  if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
+    return false;
+
+  // Build up the final weights that will be used in a temporary buffer.
+  // Compute the sum of all weights to later decide whether they need to
+  // be scaled to fit in 32 bits.
+  uint64_t WeightSum = 0;
+  SmallVector<uint32_t, 2> Weights;
+  Weights.reserve(TI->getNumSuccessors());
+  for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
+    ConstantInt *Weight =
+        mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
+    if (!Weight)
+      return false;
+    assert(Weight->getValue().getActiveBits() <= 32 &&
+           "Too many bits for uint32_t");
+    Weights.push_back(Weight->getZExtValue());
+    WeightSum += Weights.back();
+  }
+  assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
+
+  // If the sum of weights does not fit in 32 bits, scale every weight down
+  // accordingly.
+  uint64_t ScalingFactor =
+      (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
+
+  WeightSum = 0;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    Weights[i] /= ScalingFactor;
+    WeightSum += Weights[i];
+  }
+
+  if (WeightSum == 0) {
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      setEdgeProbability(BB, i, {1, e});
+  } else {
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+  }
+
+  assert(WeightSum <= UINT32_MAX &&
+         "Expected weights to scale down to 32 bits");
+
+  return true;
+}
+
+/// \brief Calculate edge weights for edges leading to cold blocks.
+///
+/// A cold block is one post-dominated by  a block with a call to a
+/// cold function.  Those edges are unlikely to be taken, so we give
+/// them relatively low weight.
+///
+/// Return true if we could compute the weights for cold edges.
+/// Return false, otherwise.
+bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 0)
+    return false;
+
+  // Determine which successors are post-dominated by a cold block.
+  SmallVector<unsigned, 4> ColdEdges;
+  SmallVector<unsigned, 4> NormalEdges;
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+    if (PostDominatedByColdCall.count(*I))
+      ColdEdges.push_back(I.getSuccessorIndex());
+    else
+      NormalEdges.push_back(I.getSuccessorIndex());
+
+  // If all successors are in the set of blocks post-dominated by cold calls,
+  // this block is in the set post-dominated by cold calls.
+  if (ColdEdges.size() == TI->getNumSuccessors())
+    PostDominatedByColdCall.insert(BB);
+  else {
+    // Otherwise, if the block itself contains a cold function, add it to the
+    // set of blocks postdominated by a cold call.
+    assert(!PostDominatedByColdCall.count(BB));
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (CallInst *CI = dyn_cast<CallInst>(I))
+        if (CI->hasFnAttr(Attribute::Cold)) {
+          PostDominatedByColdCall.insert(BB);
+          break;
+        }
+  }
+
+  // Skip probabilities if this block has a single successor.
+  if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
+    return false;
+
+  if (NormalEdges.empty()) {
+    BranchProbability Prob(1, ColdEdges.size());
+    for (unsigned SuccIdx : ColdEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
+    return true;
+  }
+
+  BranchProbability ColdProb(CC_TAKEN_WEIGHT,
+                             (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+                                 ColdEdges.size());
+  BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
+                               (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+                                   NormalEdges.size());
+
+  for (unsigned SuccIdx : ColdEdges)
+    setEdgeProbability(BB, SuccIdx, ColdProb);
+  for (unsigned SuccIdx : NormalEdges)
+    setEdgeProbability(BB, SuccIdx, NormalProb);
+
+  return true;
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
+  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI || !CI->isEquality())
+    return false;
+
+  Value *LHS = CI->getOperand(0);
+
+  if (!LHS->getType()->isPointerTy())
+    return false;
+
+  assert(CI->getOperand(1)->getType()->isPointerTy());
+
+  // p != 0   ->   isProb = true
+  // p == 0   ->   isProb = false
+  // p != q   ->   isProb = true
+  // p == q   ->   isProb = false;
+  unsigned TakenIdx = 0, NonTakenIdx = 1;
+  bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
+  if (!isProb)
+    std::swap(TakenIdx, NonTakenIdx);
+
+  BranchProbability TakenProb(PH_TAKEN_WEIGHT,
+                              PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
+  return true;
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
+                                                     const LoopInfo &LI) {
+  Loop *L = LI.getLoopFor(BB);
+  if (!L)
+    return false;
+
+  SmallVector<unsigned, 8> BackEdges;
+  SmallVector<unsigned, 8> ExitingEdges;
+  SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    if (!L->contains(*I))
+      ExitingEdges.push_back(I.getSuccessorIndex());
+    else if (L->getHeader() == *I)
+      BackEdges.push_back(I.getSuccessorIndex());
+    else
+      InEdges.push_back(I.getSuccessorIndex());
+  }
+
+  if (BackEdges.empty() && ExitingEdges.empty())
+    return false;
+
+  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
+  // normalize them so that they sum up to one.
+  SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero());
+  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+  if (!BackEdges.empty())
+    Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+  if (!InEdges.empty())
+    Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+  if (!ExitingEdges.empty())
+    Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
+
+  if (uint32_t numBackEdges = BackEdges.size()) {
+    auto Prob = Probs[0] / numBackEdges;
+    for (unsigned SuccIdx : BackEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
+  }
+
+  if (uint32_t numInEdges = InEdges.size()) {
+    auto Prob = Probs[1] / numInEdges;
+    for (unsigned SuccIdx : InEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
+  }
+
+  if (uint32_t numExitingEdges = ExitingEdges.size()) {
+    auto Prob = Probs[2] / numExitingEdges;
+    for (unsigned SuccIdx : ExitingEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
+  }
+
+  return true;
+}
+
+bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
+  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI)
+    return false;
+
+  Value *RHS = CI->getOperand(1);
+  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+  if (!CV)
+    return false;
+
+  // If the LHS is the result of AND'ing a value with a single bit bitmask,
+  // we don't have information about probabilities.
+  if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+    if (LHS->getOpcode() == Instruction::And)
+      if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+        if (AndRHS->getUniqueInteger().isPowerOf2())
+          return false;
+
+  bool isProb;
+  if (CV->isZero()) {
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      // X == 0   ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      // X != 0   ->  Likely
+      isProb = true;
+      break;
+    case CmpInst::ICMP_SLT:
+      // X < 0   ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_SGT:
+      // X > 0   ->  Likely
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
+  } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
+    // InstCombine canonicalizes X <= 0 into X < 1.
+    // X <= 0   ->  Unlikely
+    isProb = false;
+  } else if (CV->isAllOnesValue()) {
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      // X == -1  ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      // X != -1  ->  Likely
+      isProb = true;
+      break;
+    case CmpInst::ICMP_SGT:
+      // InstCombine canonicalizes X >= 0 into X > -1.
+      // X >= 0   ->  Likely
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  unsigned TakenIdx = 0, NonTakenIdx = 1;
+
+  if (!isProb)
+    std::swap(TakenIdx, NonTakenIdx);
+
+  BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
+                              ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
+  return true;
+}
+
+bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
+  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  Value *Cond = BI->getCondition();
+  FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
+  if (!FCmp)
+    return false;
+
+  bool isProb;
+  if (FCmp->isEquality()) {
+    // f1 == f2 -> Unlikely
+    // f1 != f2 -> Likely
+    isProb = !FCmp->isTrueWhenEqual();
+  } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
+    // !isnan -> Likely
+    isProb = true;
+  } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
+    // isnan -> Unlikely
+    isProb = false;
+  } else {
+    return false;
+  }
+
+  unsigned TakenIdx = 0, NonTakenIdx = 1;
+
+  if (!isProb)
+    std::swap(TakenIdx, NonTakenIdx);
+
+  BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
+                              FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
+  return true;
+}
+
+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+  InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+  if (!II)
+    return false;
+
+  BranchProbability TakenProb(IH_TAKEN_WEIGHT,
+                              IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb);
+  setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());
+  return true;
+}
+
+void BranchProbabilityInfo::releaseMemory() {
+  Probs.clear();
+}
+
+void BranchProbabilityInfo::print(raw_ostream &OS) const {
+  OS << "---- Branch Probabilities ----\n";
+  // We print the probabilities from the last function the analysis ran over,
+  // or the function it is currently running over.
+  assert(LastF && "Cannot print prior to running over a function");
+  for (const auto &BI : *LastF) {
+    for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
+         ++SI) {
+      printEdgeProbability(OS << "  ", &BI, *SI);
+    }
+  }
+}
+
+bool BranchProbabilityInfo::
+isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  // FIXME: Compare against a static "hot" BranchProbability.
+  return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+  auto MaxProb = BranchProbability::getZero();
+  BasicBlock *MaxSucc = nullptr;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    auto Prob = getEdgeProbability(BB, Succ);
+    if (Prob > MaxProb) {
+      MaxProb = Prob;
+      MaxSucc = Succ;
+    }
+  }
+
+  // Hot probability is at least 4/5 = 80%
+  if (MaxProb > BranchProbability(4, 5))
+    return MaxSucc;
+
+  return nullptr;
+}
+
+/// Get the raw edge probability for the edge. If can't find it, return a
+/// default probability 1/N where N is the number of successors. Here an edge is
+/// specified using PredBlock and an
+/// index to the successors.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          unsigned IndexInSuccessors) const {
+  auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
+
+  if (I != Probs.end())
+    return I->second;
+
+  return {1,
+          static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
+}
+
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          succ_const_iterator Dst) const {
+  return getEdgeProbability(Src, Dst.getSuccessorIndex());
+}
+
+/// Get the raw edge probability calculated for the block pair. This returns the
+/// sum of all raw edge probabilities from Src to Dst.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          const BasicBlock *Dst) const {
+  auto Prob = BranchProbability::getZero();
+  bool FoundProb = false;
+  for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
+    if (*I == Dst) {
+      auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
+      if (MapI != Probs.end()) {
+        FoundProb = true;
+        Prob += MapI->second;
+      }
+    }
+  uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
+  return FoundProb ? Prob : BranchProbability(1, succ_num);
+}
+
+/// Set the edge probability for a given edge specified by PredBlock and an
+/// index to the successors.
+void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
+                                               unsigned IndexInSuccessors,
+                                               BranchProbability Prob) {
+  Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
+  DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
+               << " successor probability to " << Prob << "\n");
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
+                                            const BasicBlock *Src,
+                                            const BasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge " << Src->getName() << " -> " << Dst->getName()
+     << " probability is " << Prob
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
+
+void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) {
+  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+               << " ----\n\n");
+  LastF = &F; // Store the last function we ran on for printing.
+  assert(PostDominatedByUnreachable.empty());
+  assert(PostDominatedByColdCall.empty());
+
+  // Walk the basic blocks in post-order so that we can build up state about
+  // the successors of a block iteratively.
+  for (auto BB : post_order(&F.getEntryBlock())) {
+    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+    if (calcUnreachableHeuristics(BB))
+      continue;
+    if (calcMetadataWeights(BB))
+      continue;
+    if (calcColdCallHeuristics(BB))
+      continue;
+    if (calcLoopBranchHeuristics(BB, LI))
+      continue;
+    if (calcPointerHeuristics(BB))
+      continue;
+    if (calcZeroHeuristics(BB))
+      continue;
+    if (calcFloatingPointHeuristics(BB))
+      continue;
+    calcInvokeHeuristics(BB);
+  }
+
+  PostDominatedByUnreachable.clear();
+  PostDominatedByColdCall.clear();
+}
+
+void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
+  const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BPI.calculate(F, LI);
+  return false;
+}
+
+void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
+
+void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
+                                             const Module *) const {
+  BPI.print(OS);
+}
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
new file mode 100644
index 0000000..0dfd57d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -0,0 +1,236 @@
+//===-- CFG.cpp - BasicBlock analysis --------------------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions performs analyses on basic blocks, and instructions
+// contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFG.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+
+using namespace llvm;
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them.  This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+  const BasicBlock *BB = &F.getEntryBlock();
+  if (succ_empty(BB))
+    return;
+
+  SmallPtrSet<const BasicBlock*, 8> Visited;
+  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+  SmallPtrSet<const BasicBlock*, 8> InStack;
+
+  Visited.insert(BB);
+  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+  InStack.insert(BB);
+  do {
+    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+    const BasicBlock *ParentBB = Top.first;
+    succ_const_iterator &I = Top.second;
+
+    bool FoundNew = false;
+    while (I != succ_end(ParentBB)) {
+      BB = *I++;
+      if (Visited.insert(BB).second) {
+        FoundNew = true;
+        break;
+      }
+      // Successor is in VisitStack, it's a back edge.
+      if (InStack.count(BB))
+        Result.push_back(std::make_pair(ParentBB, BB));
+    }
+
+    if (FoundNew) {
+      // Go down one level if there is a unvisited successor.
+      InStack.insert(BB);
+      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+    } else {
+      // Go up one level.
+      InStack.erase(VisitStack.pop_back_val().first);
+    }
+  } while (!VisitStack.empty());
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
+    const BasicBlock *Succ) {
+  const TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+  unsigned e = Term->getNumSuccessors();
+#endif
+  for (unsigned i = 0; ; ++i) {
+    assert(i != e && "Didn't find edge?");
+    if (Term->getSuccessor(i) == Succ)
+      return i;
+  }
+}
+
+/// isCriticalEdge - Return true if the specified edge is a critical edge.
+/// Critical edges are edges from a block with multiple successors to a block
+/// with multiple predecessors.
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                          bool AllowIdenticalEdges) {
+  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  if (TI->getNumSuccessors() == 1) return false;
+
+  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+  // If there is more than one predecessor, this is a critical edge...
+  assert(I != E && "No preds, but we have an edge to the block?");
+  const BasicBlock *FirstPred = *I;
+  ++I;        // Skip one edge due to the incoming arc from TI.
+  if (!AllowIdenticalEdges)
+    return I != E;
+
+  // If AllowIdenticalEdges is true, then we allow this edge to be considered
+  // non-critical iff all preds come from TI's block.
+  for (; I != E; ++I)
+    if (*I != FirstPred)
+      return true;
+  return false;
+}
+
+// LoopInfo contains a mapping from basic block to the innermost loop. Find
+// the outermost loop in the loop nest that contains BB.
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
+  const Loop *L = LI->getLoopFor(BB);
+  if (L) {
+    while (const Loop *Parent = L->getParentLoop())
+      L = Parent;
+  }
+  return L;
+}
+
+// True if there is a loop which contains both BB1 and BB2.
+static bool loopContainsBoth(const LoopInfo *LI,
+                             const BasicBlock *BB1, const BasicBlock *BB2) {
+  const Loop *L1 = getOutermostLoop(LI, BB1);
+  const Loop *L2 = getOutermostLoop(LI, BB2);
+  return L1 != nullptr && L1 == L2;
+}
+
+bool llvm::isPotentiallyReachableFromMany(
+    SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+    const DominatorTree *DT, const LoopInfo *LI) {
+  // When the stop block is unreachable, it's dominated from everywhere,
+  // regardless of whether there's a path between the two blocks.
+  if (DT && !DT->isReachableFromEntry(StopBB))
+    DT = nullptr;
+
+  // Limit the number of blocks we visit. The goal is to avoid run-away compile
+  // times on large CFGs without hampering sensible code. Arbitrarily chosen.
+  unsigned Limit = 32;
+  SmallSet<const BasicBlock*, 64> Visited;
+  do {
+    BasicBlock *BB = Worklist.pop_back_val();
+    if (!Visited.insert(BB).second)
+      continue;
+    if (BB == StopBB)
+      return true;
+    if (DT && DT->dominates(BB, StopBB))
+      return true;
+    if (LI && loopContainsBoth(LI, BB, StopBB))
+      return true;
+
+    if (!--Limit) {
+      // We haven't been able to prove it one way or the other. Conservatively
+      // answer true -- that there is potentially a path.
+      return true;
+    }
+
+    if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
+      // All blocks in a single loop are reachable from all other blocks. From
+      // any of these blocks, we can skip directly to the exits of the loop,
+      // ignoring any other blocks inside the loop body.
+      Outer->getExitBlocks(Worklist);
+    } else {
+      Worklist.append(succ_begin(BB), succ_end(BB));
+    }
+  } while (!Worklist.empty());
+
+  // We have exhausted all possible paths and are certain that 'To' can not be
+  // reached from 'From'.
+  return false;
+}
+
+bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
+                                  const DominatorTree *DT, const LoopInfo *LI) {
+  assert(A->getParent() == B->getParent() &&
+         "This analysis is function-local!");
+
+  SmallVector<BasicBlock*, 32> Worklist;
+  Worklist.push_back(const_cast<BasicBlock*>(A));
+
+  return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B),
+                                        DT, LI);
+}
+
+bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
+                                  const DominatorTree *DT, const LoopInfo *LI) {
+  assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+         "This analysis is function-local!");
+
+  SmallVector<BasicBlock*, 32> Worklist;
+
+  if (A->getParent() == B->getParent()) {
+    // The same block case is special because it's the only time we're looking
+    // within a single block to see which instruction comes first. Once we
+    // start looking at multiple blocks, the first instruction of the block is
+    // reachable, so we only need to determine reachability between whole
+    // blocks.
+    BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+    // If the block is in a loop then we can reach any instruction in the block
+    // from any other instruction in the block by going around a backedge.
+    if (LI && LI->getLoopFor(BB) != nullptr)
+      return true;
+
+    // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+    for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E;
+         ++I) {
+      if (&*I == B)
+        return true;
+    }
+
+    // Can't be in a loop if it's the entry block -- the entry block may not
+    // have predecessors.
+    if (BB == &BB->getParent()->getEntryBlock())
+      return false;
+
+    // Otherwise, continue doing the normal per-BB CFG walk.
+    Worklist.append(succ_begin(BB), succ_end(BB));
+
+    if (Worklist.empty()) {
+      // We've proven that there's no path!
+      return false;
+    }
+  } else {
+    Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
+  }
+
+  if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
+    return true;
+  if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
+    return false;
+
+  return isPotentiallyReachableFromMany(
+      Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI);
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 0000000..c86f1f5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,165 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
+using namespace llvm;
+
+namespace {
+  struct CFGViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override {
+      F.viewCFG();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+  struct CFGOnlyViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGOnlyViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override {
+      F.viewCFGOnly();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+                "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+  struct CFGPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGPrinter() : FunctionPass(ID) {
+      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override {
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
+      errs() << "Writing '" << Filename << "'...";
+
+      std::error_code EC;
+      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+      if (!EC)
+        WriteGraph(File, (const Function*)&F);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
+                false, true)
+
+namespace {
+  struct CFGOnlyPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGOnlyPrinter() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override {
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
+      errs() << "Writing '" << Filename << "'...";
+
+      std::error_code EC;
+      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+      if (!EC)
+        WriteGraph(File, (const Function*)&F, true);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+   "Print CFG of function to 'dot' file (with no function bodies)",
+   false, true)
+
+/// viewCFG - This function is meant for use from the debugger.  You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function.  This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+  ViewGraph(this, "cfg" + getName());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger.  It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label.  If you are only interested in the CFG
+/// this can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+  ViewGraph(this, "cfg" + getName(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+  return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+  return new CFGOnlyPrinter();
+}
+
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
new file mode 100644
index 0000000..4843ed6
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -0,0 +1,1119 @@
+//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a CFL-based context-insensitive alias analysis
+// algorithm. It does not depend on types. The algorithm is a mixture of the one
+// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu
+// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to
+// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
+// papers, we build a graph of the uses of a variable, where each node is a
+// memory location, and each edge is an action that happened on that memory
+// location.  The "actions" can be one of Dereference, Reference, or Assign.
+//
+// Two variables are considered as aliasing iff you can reach one value's node
+// from the other value's node and the language formed by concatenating all of
+// the edge labels (actions) conforms to a context-free grammar.
+//
+// Because this algorithm requires a graph search on each query, we execute the
+// algorithm outlined in "Fast algorithms..." (mentioned above)
+// in order to transform the graph into sets of variables that may alias in
+// ~nlogn time (n = number of variables.), which makes queries take constant
+// time.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "StratifiedSets.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfl-aa"
+
+CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {}
+CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+// \brief Information we have about a function and would like to keep around
+struct CFLAAResult::FunctionInfo {
+  StratifiedSets<Value *> Sets;
+  // Lots of functions have < 4 returns. Adjust as necessary.
+  SmallVector<Value *, 4> ReturnedValues;
+
+  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
+// Try to go from a Value* to a Function*. Never returns nullptr.
+static Optional<Function *> parentFunctionOfValue(Value *);
+
+// Returns possible functions called by the Inst* into the given
+// SmallVectorImpl. Returns true if targets found, false otherwise.
+// This is templated because InvokeInst/CallInst give us the same
+// set of functions that we care about, and I don't like repeating
+// myself.
+template <typename Inst>
+static bool getPossibleTargets(Inst *, SmallVectorImpl<Function *> &);
+
+// Some instructions need to have their users tracked. Instructions like
+// `add` require you to get the users of the Instruction* itself, other
+// instructions like `store` require you to get the users of the first
+// operand. This function gets the "proper" value to track for each
+// type of instruction we support.
+static Optional<Value *> getTargetValue(Instruction *);
+
+// There are certain instructions (i.e. FenceInst, etc.) that we ignore.
+// This notes that we should ignore those.
+static bool hasUsefulEdges(Instruction *);
+
+const StratifiedIndex StratifiedLink::SetSentinel =
+    std::numeric_limits<StratifiedIndex>::max();
+
+namespace {
+// StratifiedInfo Attribute things.
+typedef unsigned StratifiedAttr;
+LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
+LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
+LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
+LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3;
+LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
+LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+
+LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex;
+LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
+
+// \brief StratifiedSets call for knowledge of "direction", so this is how we
+// represent that locally.
+enum class Level { Same, Above, Below };
+
+// \brief Edges can be one of four "weights" -- each weight must have an inverse
+// weight (Assign has Assign; Reference has Dereference).
+enum class EdgeType {
+  // The weight assigned when assigning from or to a value. For example, in:
+  // %b = getelementptr %a, 0
+  // ...The relationships are %b assign %a, and %a assign %b. This used to be
+  // two edges, but having a distinction bought us nothing.
+  Assign,
+
+  // The edge used when we have an edge going from some handle to a Value.
+  // Examples of this include:
+  // %b = load %a              (%b Dereference %a)
+  // %b = extractelement %a, 0 (%a Dereference %b)
+  Dereference,
+
+  // The edge used when our edge goes from a value to a handle that may have
+  // contained it at some point. Examples:
+  // %b = load %a              (%a Reference %b)
+  // %b = extractelement %a, 0 (%b Reference %a)
+  Reference
+};
+
+// \brief Encodes the notion of a "use"
+struct Edge {
+  // \brief Which value the edge is coming from
+  Value *From;
+
+  // \brief Which value the edge is pointing to
+  Value *To;
+
+  // \brief Edge weight
+  EdgeType Weight;
+
+  // \brief Whether we aliased any external values along the way that may be
+  // invisible to the analysis (i.e. landingpad for exceptions, calls for
+  // interprocedural analysis, etc.)
+  StratifiedAttrs AdditionalAttrs;
+
+  Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A)
+      : From(From), To(To), Weight(W), AdditionalAttrs(A) {}
+};
+
+// \brief Gets the edges our graph should have, based on an Instruction*
+class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
+  CFLAAResult &AA;
+  SmallVectorImpl<Edge> &Output;
+
+public:
+  GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output)
+      : AA(AA), Output(Output) {}
+
+  void visitInstruction(Instruction &) {
+    llvm_unreachable("Unsupported instruction encountered");
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &Inst) {
+    auto *Ptr = Inst.getOperand(0);
+    Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+  }
+
+  void visitIntToPtrInst(IntToPtrInst &Inst) {
+    auto *Ptr = &Inst;
+    Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+  }
+
+  void visitCastInst(CastInst &Inst) {
+    Output.push_back(
+        Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone));
+  }
+
+  void visitBinaryOperator(BinaryOperator &Inst) {
+    auto *Op1 = Inst.getOperand(0);
+    auto *Op2 = Inst.getOperand(1);
+    Output.push_back(Edge(&Inst, Op1, EdgeType::Assign, AttrNone));
+    Output.push_back(Edge(&Inst, Op2, EdgeType::Assign, AttrNone));
+  }
+
+  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+    auto *Ptr = Inst.getPointerOperand();
+    auto *Val = Inst.getNewValOperand();
+    Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+  }
+
+  void visitAtomicRMWInst(AtomicRMWInst &Inst) {
+    auto *Ptr = Inst.getPointerOperand();
+    auto *Val = Inst.getValOperand();
+    Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+  }
+
+  void visitPHINode(PHINode &Inst) {
+    for (Value *Val : Inst.incoming_values()) {
+      Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone));
+    }
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &Inst) {
+    auto *Op = Inst.getPointerOperand();
+    Output.push_back(Edge(&Inst, Op, EdgeType::Assign, AttrNone));
+    for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I)
+      Output.push_back(Edge(&Inst, *I, EdgeType::Assign, AttrNone));
+  }
+
+  void visitSelectInst(SelectInst &Inst) {
+    // Condition is not processed here (The actual statement producing
+    // the condition result is processed elsewhere). For select, the
+    // condition is evaluated, but not loaded, stored, or assigned
+    // simply as a result of being the condition of a select.
+
+    auto *TrueVal = Inst.getTrueValue();
+    Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
+    auto *FalseVal = Inst.getFalseValue();
+    Output.push_back(Edge(&Inst, FalseVal, EdgeType::Assign, AttrNone));
+  }
+
+  void visitAllocaInst(AllocaInst &) {}
+
+  void visitLoadInst(LoadInst &Inst) {
+    auto *Ptr = Inst.getPointerOperand();
+    auto *Val = &Inst;
+    Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+  }
+
+  void visitStoreInst(StoreInst &Inst) {
+    auto *Ptr = Inst.getPointerOperand();
+    auto *Val = Inst.getValueOperand();
+    Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+  }
+
+  void visitVAArgInst(VAArgInst &Inst) {
+    // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does
+    // two things:
+    //  1. Loads a value from *((T*)*Ptr).
+    //  2. Increments (stores to) *Ptr by some target-specific amount.
+    // For now, we'll handle this like a landingpad instruction (by placing the
+    // result in its own group, and having that group alias externals).
+    auto *Val = &Inst;
+    Output.push_back(Edge(Val, Val, EdgeType::Assign, AttrAll));
+  }
+
+  static bool isFunctionExternal(Function *Fn) {
+    return Fn->isDeclaration() || !Fn->hasLocalLinkage();
+  }
+
+  // Gets whether the sets at Index1 above, below, or equal to the sets at
+  // Index2. Returns None if they are not in the same set chain.
+  static Optional<Level> getIndexRelation(const StratifiedSets<Value *> &Sets,
+                                          StratifiedIndex Index1,
+                                          StratifiedIndex Index2) {
+    if (Index1 == Index2)
+      return Level::Same;
+
+    const auto *Current = &Sets.getLink(Index1);
+    while (Current->hasBelow()) {
+      if (Current->Below == Index2)
+        return Level::Below;
+      Current = &Sets.getLink(Current->Below);
+    }
+
+    Current = &Sets.getLink(Index1);
+    while (Current->hasAbove()) {
+      if (Current->Above == Index2)
+        return Level::Above;
+      Current = &Sets.getLink(Current->Above);
+    }
+
+    return NoneType();
+  }
+
+  bool
+  tryInterproceduralAnalysis(const SmallVectorImpl<Function *> &Fns,
+                             Value *FuncValue,
+                             const iterator_range<User::op_iterator> &Args) {
+    const unsigned ExpectedMaxArgs = 8;
+    const unsigned MaxSupportedArgs = 50;
+    assert(Fns.size() > 0);
+
+    // I put this here to give us an upper bound on time taken by IPA. Is it
+    // really (realistically) needed? Keep in mind that we do have an n^2 algo.
+    if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs)
+      return false;
+
+    // Exit early if we'll fail anyway
+    for (auto *Fn : Fns) {
+      if (isFunctionExternal(Fn) || Fn->isVarArg())
+        return false;
+      auto &MaybeInfo = AA.ensureCached(Fn);
+      if (!MaybeInfo.hasValue())
+        return false;
+    }
+
+    SmallVector<Value *, ExpectedMaxArgs> Arguments(Args.begin(), Args.end());
+    SmallVector<StratifiedInfo, ExpectedMaxArgs> Parameters;
+    for (auto *Fn : Fns) {
+      auto &Info = *AA.ensureCached(Fn);
+      auto &Sets = Info.Sets;
+      auto &RetVals = Info.ReturnedValues;
+
+      Parameters.clear();
+      for (auto &Param : Fn->args()) {
+        auto MaybeInfo = Sets.find(&Param);
+        // Did a new parameter somehow get added to the function/slip by?
+        if (!MaybeInfo.hasValue())
+          return false;
+        Parameters.push_back(*MaybeInfo);
+      }
+
+      // Adding an edge from argument -> return value for each parameter that
+      // may alias the return value
+      for (unsigned I = 0, E = Parameters.size(); I != E; ++I) {
+        auto &ParamInfo = Parameters[I];
+        auto &ArgVal = Arguments[I];
+        bool AddEdge = false;
+        StratifiedAttrs Externals;
+        for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) {
+          auto MaybeInfo = Sets.find(RetVals[X]);
+          if (!MaybeInfo.hasValue())
+            return false;
+
+          auto &RetInfo = *MaybeInfo;
+          auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs;
+          auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs;
+          auto MaybeRelation =
+              getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index);
+          if (MaybeRelation.hasValue()) {
+            AddEdge = true;
+            Externals |= RetAttrs | ParamAttrs;
+          }
+        }
+        if (AddEdge)
+          Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
+                                StratifiedAttrs().flip()));
+      }
+
+      if (Parameters.size() != Arguments.size())
+        return false;
+
+      // Adding edges between arguments for arguments that may end up aliasing
+      // each other. This is necessary for functions such as
+      // void foo(int** a, int** b) { *a = *b; }
+      // (Technically, the proper sets for this would be those below
+      // Arguments[I] and Arguments[X], but our algorithm will produce
+      // extremely similar, and equally correct, results either way)
+      for (unsigned I = 0, E = Arguments.size(); I != E; ++I) {
+        auto &MainVal = Arguments[I];
+        auto &MainInfo = Parameters[I];
+        auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs;
+        for (unsigned X = I + 1; X != E; ++X) {
+          auto &SubInfo = Parameters[X];
+          auto &SubVal = Arguments[X];
+          auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs;
+          auto MaybeRelation =
+              getIndexRelation(Sets, MainInfo.Index, SubInfo.Index);
+
+          if (!MaybeRelation.hasValue())
+            continue;
+
+          auto NewAttrs = SubAttrs | MainAttrs;
+          Output.push_back(Edge(MainVal, SubVal, EdgeType::Assign, NewAttrs));
+        }
+      }
+    }
+    return true;
+  }
+
+  template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+    // TODO: Add support for noalias args/all the other fun function attributes
+    // that we can tack on.
+    SmallVector<Function *, 4> Targets;
+    if (getPossibleTargets(&Inst, Targets)) {
+      if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
+        return;
+      // Cleanup from interprocedural analysis
+      Output.clear();
+    }
+
+    // Because the function is opaque, we need to note that anything
+    // could have happened to the arguments, and that the result could alias
+    // just about anything, too.
+    // The goal of the loop is in part to unify many Values into one set, so we
+    // don't care if the function is void there.
+    for (Value *V : Inst.arg_operands())
+      Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+    if (Inst.getNumArgOperands() == 0 &&
+        Inst.getType() != Type::getVoidTy(Inst.getContext()))
+      Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
+  }
+
+  void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
+
+  void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); }
+
+  // Because vectors/aggregates are immutable and unaddressable,
+  // there's nothing we can do to coax a value out of them, other
+  // than calling Extract{Element,Value}. We can effectively treat
+  // them as pointers to arbitrary memory locations we can store in
+  // and load from.
+  void visitExtractElementInst(ExtractElementInst &Inst) {
+    auto *Ptr = Inst.getVectorOperand();
+    auto *Val = &Inst;
+    Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+  }
+
+  void visitInsertElementInst(InsertElementInst &Inst) {
+    auto *Vec = Inst.getOperand(0);
+    auto *Val = Inst.getOperand(1);
+    Output.push_back(Edge(&Inst, Vec, EdgeType::Assign, AttrNone));
+    Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+  }
+
+  void visitLandingPadInst(LandingPadInst &Inst) {
+    // Exceptions come from "nowhere", from our analysis' perspective.
+    // So we place the instruction its own group, noting that said group may
+    // alias externals
+    Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
+  }
+
+  void visitInsertValueInst(InsertValueInst &Inst) {
+    auto *Agg = Inst.getOperand(0);
+    auto *Val = Inst.getOperand(1);
+    Output.push_back(Edge(&Inst, Agg, EdgeType::Assign, AttrNone));
+    Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+  }
+
+  void visitExtractValueInst(ExtractValueInst &Inst) {
+    auto *Ptr = Inst.getAggregateOperand();
+    Output.push_back(Edge(&Inst, Ptr, EdgeType::Reference, AttrNone));
+  }
+
+  void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
+    auto *From1 = Inst.getOperand(0);
+    auto *From2 = Inst.getOperand(1);
+    Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone));
+    Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone));
+  }
+
+  void visitConstantExpr(ConstantExpr *CE) {
+    switch (CE->getOpcode()) {
+    default:
+      llvm_unreachable("Unknown instruction type encountered!");
+// Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
+  case Instruction::OPCODE:                                                    \
+    visit##OPCODE(*(CLASS *)CE);                                               \
+    break;
+#include "llvm/IR/Instruction.def"
+    }
+  }
+};
+
+// For a given instruction, we need to know which Value* to get the
+// users of in order to build our graph. In some cases (i.e. add),
+// we simply need the Instruction*. In other cases (i.e. store),
+// finding the users of the Instruction* is useless; we need to find
+// the users of the first operand. This handles determining which
+// value to follow for us.
+//
+// Note: we *need* to keep this in sync with GetEdgesVisitor. Add
+// something to GetEdgesVisitor, add it here -- remove something from
+// GetEdgesVisitor, remove it here.
+class GetTargetValueVisitor
+    : public InstVisitor<GetTargetValueVisitor, Value *> {
+public:
+  Value *visitInstruction(Instruction &Inst) { return &Inst; }
+
+  Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); }
+
+  Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+    return Inst.getPointerOperand();
+  }
+
+  Value *visitAtomicRMWInst(AtomicRMWInst &Inst) {
+    return Inst.getPointerOperand();
+  }
+
+  Value *visitInsertElementInst(InsertElementInst &Inst) {
+    return Inst.getOperand(0);
+  }
+
+  Value *visitInsertValueInst(InsertValueInst &Inst) {
+    return Inst.getAggregateOperand();
+  }
+};
+
+// Set building requires a weighted bidirectional graph.
+template <typename EdgeTypeT> class WeightedBidirectionalGraph {
+public:
+  typedef std::size_t Node;
+
+private:
+  const static Node StartNode = Node(0);
+
+  struct Edge {
+    EdgeTypeT Weight;
+    Node Other;
+
+    Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {}
+
+    bool operator==(const Edge &E) const {
+      return Weight == E.Weight && Other == E.Other;
+    }
+
+    bool operator!=(const Edge &E) const { return !operator==(E); }
+  };
+
+  struct NodeImpl {
+    std::vector<Edge> Edges;
+  };
+
+  std::vector<NodeImpl> NodeImpls;
+
+  bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); }
+
+  const NodeImpl &getNode(Node N) const { return NodeImpls[N]; }
+  NodeImpl &getNode(Node N) { return NodeImpls[N]; }
+
+public:
+  // ----- Various Edge iterators for the graph ----- //
+
+  // \brief Iterator for edges. Because this graph is bidirected, we don't
+  // allow modification of the edges using this iterator. Additionally, the
+  // iterator becomes invalid if you add edges to or from the node you're
+  // getting the edges of.
+  struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
+                                             std::tuple<EdgeTypeT, Node *>> {
+    EdgeIterator(const typename std::vector<Edge>::const_iterator &Iter)
+        : Current(Iter) {}
+
+    EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {}
+
+    EdgeIterator &operator++() {
+      ++Current;
+      return *this;
+    }
+
+    EdgeIterator operator++(int) {
+      EdgeIterator Copy(Current);
+      operator++();
+      return Copy;
+    }
+
+    std::tuple<EdgeTypeT, Node> &operator*() {
+      Store = std::make_tuple(Current->Weight, Current->Other);
+      return Store;
+    }
+
+    bool operator==(const EdgeIterator &Other) const {
+      return Current == Other.Current;
+    }
+
+    bool operator!=(const EdgeIterator &Other) const {
+      return !operator==(Other);
+    }
+
+  private:
+    typename std::vector<Edge>::const_iterator Current;
+    std::tuple<EdgeTypeT, Node> Store;
+  };
+
+  // Wrapper for EdgeIterator with begin()/end() calls.
+  struct EdgeIterable {
+    EdgeIterable(const std::vector<Edge> &Edges)
+        : BeginIter(Edges.begin()), EndIter(Edges.end()) {}
+
+    EdgeIterator begin() { return EdgeIterator(BeginIter); }
+
+    EdgeIterator end() { return EdgeIterator(EndIter); }
+
+  private:
+    typename std::vector<Edge>::const_iterator BeginIter;
+    typename std::vector<Edge>::const_iterator EndIter;
+  };
+
+  // ----- Actual graph-related things ----- //
+
+  WeightedBidirectionalGraph() {}
+
+  WeightedBidirectionalGraph(WeightedBidirectionalGraph<EdgeTypeT> &&Other)
+      : NodeImpls(std::move(Other.NodeImpls)) {}
+
+  WeightedBidirectionalGraph<EdgeTypeT> &
+  operator=(WeightedBidirectionalGraph<EdgeTypeT> &&Other) {
+    NodeImpls = std::move(Other.NodeImpls);
+    return *this;
+  }
+
+  Node addNode() {
+    auto Index = NodeImpls.size();
+    auto NewNode = Node(Index);
+    NodeImpls.push_back(NodeImpl());
+    return NewNode;
+  }
+
+  void addEdge(Node From, Node To, const EdgeTypeT &Weight,
+               const EdgeTypeT &ReverseWeight) {
+    assert(inbounds(From));
+    assert(inbounds(To));
+    auto &FromNode = getNode(From);
+    auto &ToNode = getNode(To);
+    FromNode.Edges.push_back(Edge(Weight, To));
+    ToNode.Edges.push_back(Edge(ReverseWeight, From));
+  }
+
+  EdgeIterable edgesFor(const Node &N) const {
+    const auto &Node = getNode(N);
+    return EdgeIterable(Node.Edges);
+  }
+
+  bool empty() const { return NodeImpls.empty(); }
+  std::size_t size() const { return NodeImpls.size(); }
+
+  // \brief Gets an arbitrary node in the graph as a starting point for
+  // traversal.
+  Node getEntryNode() {
+    assert(inbounds(StartNode));
+    return StartNode;
+  }
+};
+
+typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
+typedef DenseMap<Value *, GraphT::Node> NodeMapT;
+}
+
+//===----------------------------------------------------------------------===//
+// Function declarations that require types defined in the namespace above
+//===----------------------------------------------------------------------===//
+
+// Given an argument number, returns the appropriate Attr index to set.
+static StratifiedAttr argNumberToAttrIndex(StratifiedAttr);
+
+// Given a Value, potentially return which AttrIndex it maps to.
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
+
+// Gets the inverse of a given EdgeType.
+static EdgeType flipWeight(EdgeType);
+
+// Gets edges of the given Instruction*, writing them to the SmallVector*.
+static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &);
+
+// Gets edges of the given ConstantExpr*, writing them to the SmallVector*.
+static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &);
+
+// Gets the "Level" that one should travel in StratifiedSets
+// given an EdgeType.
+static Level directionOfEdgeType(EdgeType);
+
+// Builds the graph needed for constructing the StratifiedSets for the
+// given function
+static void buildGraphFrom(CFLAAResult &, Function *,
+                           SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
+
+// Gets the edges of a ConstantExpr as if it was an Instruction. This
+// function also acts on any nested ConstantExprs, adding the edges
+// of those to the given SmallVector as well.
+static void constexprToEdges(CFLAAResult &, ConstantExpr &,
+                             SmallVectorImpl<Edge> &);
+
+// Given an Instruction, this will add it to the graph, along with any
+// Instructions that are potentially only available from said Instruction
+// For example, given the following line:
+//   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
+// addInstructionToGraph would add both the `load` and `getelementptr`
+// instructions to the graph appropriately.
+static void addInstructionToGraph(CFLAAResult &, Instruction &,
+                                  SmallVectorImpl<Value *> &, NodeMapT &,
+                                  GraphT &);
+
+// Notes whether it would be pointless to add the given Value to our sets.
+static bool canSkipAddingToSets(Value *Val);
+
+static Optional<Function *> parentFunctionOfValue(Value *Val) {
+  if (auto *Inst = dyn_cast<Instruction>(Val)) {
+    auto *Bb = Inst->getParent();
+    return Bb->getParent();
+  }
+
+  if (auto *Arg = dyn_cast<Argument>(Val))
+    return Arg->getParent();
+  return NoneType();
+}
+
+template <typename Inst>
+static bool getPossibleTargets(Inst *Call,
+                               SmallVectorImpl<Function *> &Output) {
+  if (auto *Fn = Call->getCalledFunction()) {
+    Output.push_back(Fn);
+    return true;
+  }
+
+  // TODO: If the call is indirect, we might be able to enumerate all potential
+  // targets of the call and return them, rather than just failing.
+  return false;
+}
+
+static Optional<Value *> getTargetValue(Instruction *Inst) {
+  GetTargetValueVisitor V;
+  return V.visit(Inst);
+}
+
+static bool hasUsefulEdges(Instruction *Inst) {
+  bool IsNonInvokeTerminator =
+      isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst);
+  return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator;
+}
+
+static bool hasUsefulEdges(ConstantExpr *CE) {
+  // ConstantExpr doesn't have terminators, invokes, or fences, so only needs
+  // to check for compares.
+  return CE->getOpcode() != Instruction::ICmp &&
+         CE->getOpcode() != Instruction::FCmp;
+}
+
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
+  if (isa<GlobalValue>(Val))
+    return AttrGlobalIndex;
+
+  if (auto *Arg = dyn_cast<Argument>(Val))
+    // Only pointer arguments should have the argument attribute,
+    // because things can't escape through scalars without us seeing a
+    // cast, and thus, interaction with them doesn't matter.
+    if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
+      return argNumberToAttrIndex(Arg->getArgNo());
+  return NoneType();
+}
+
+static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
+  if (ArgNum >= AttrMaxNumArgs)
+    return AttrAllIndex;
+  return ArgNum + AttrFirstArgIndex;
+}
+
+static EdgeType flipWeight(EdgeType Initial) {
+  switch (Initial) {
+  case EdgeType::Assign:
+    return EdgeType::Assign;
+  case EdgeType::Dereference:
+    return EdgeType::Reference;
+  case EdgeType::Reference:
+    return EdgeType::Dereference;
+  }
+  llvm_unreachable("Incomplete coverage of EdgeType enum");
+}
+
+static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst,
+                        SmallVectorImpl<Edge> &Output) {
+  assert(hasUsefulEdges(Inst) &&
+         "Expected instructions to have 'useful' edges");
+  GetEdgesVisitor v(Analysis, Output);
+  v.visit(Inst);
+}
+
+static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE,
+                        SmallVectorImpl<Edge> &Output) {
+  assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");
+  GetEdgesVisitor v(Analysis, Output);
+  v.visitConstantExpr(CE);
+}
+
+static Level directionOfEdgeType(EdgeType Weight) {
+  switch (Weight) {
+  case EdgeType::Reference:
+    return Level::Above;
+  case EdgeType::Dereference:
+    return Level::Below;
+  case EdgeType::Assign:
+    return Level::Same;
+  }
+  llvm_unreachable("Incomplete switch coverage");
+}
+
+static void constexprToEdges(CFLAAResult &Analysis,
+                             ConstantExpr &CExprToCollapse,
+                             SmallVectorImpl<Edge> &Results) {
+  SmallVector<ConstantExpr *, 4> Worklist;
+  Worklist.push_back(&CExprToCollapse);
+
+  SmallVector<Edge, 8> ConstexprEdges;
+  SmallPtrSet<ConstantExpr *, 4> Visited;
+  while (!Worklist.empty()) {
+    auto *CExpr = Worklist.pop_back_val();
+
+    if (!hasUsefulEdges(CExpr))
+      continue;
+
+    ConstexprEdges.clear();
+    argsToEdges(Analysis, CExpr, ConstexprEdges);
+    for (auto &Edge : ConstexprEdges) {
+      if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
+        if (Visited.insert(Nested).second)
+          Worklist.push_back(Nested);
+
+      if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
+        if (Visited.insert(Nested).second)
+          Worklist.push_back(Nested);
+    }
+
+    Results.append(ConstexprEdges.begin(), ConstexprEdges.end());
+  }
+}
+
+static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst,
+                                  SmallVectorImpl<Value *> &ReturnedValues,
+                                  NodeMapT &Map, GraphT &Graph) {
+  const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
+    auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
+    auto &Iter = Pair.first;
+    if (Pair.second) {
+      auto NewNode = Graph.addNode();
+      Iter->second = NewNode;
+    }
+    return Iter->second;
+  };
+
+  // We don't want the edges of most "return" instructions, but we *do* want
+  // to know what can be returned.
+  if (isa<ReturnInst>(&Inst))
+    ReturnedValues.push_back(&Inst);
+
+  if (!hasUsefulEdges(&Inst))
+    return;
+
+  SmallVector<Edge, 8> Edges;
+  argsToEdges(Analysis, &Inst, Edges);
+
+  // In the case of an unused alloca (or similar), edges may be empty. Note
+  // that it exists so we can potentially answer NoAlias.
+  if (Edges.empty()) {
+    auto MaybeVal = getTargetValue(&Inst);
+    assert(MaybeVal.hasValue());
+    auto *Target = *MaybeVal;
+    findOrInsertNode(Target);
+    return;
+  }
+
+  const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) {
+    auto To = findOrInsertNode(E.To);
+    auto From = findOrInsertNode(E.From);
+    auto FlippedWeight = flipWeight(E.Weight);
+    auto Attrs = E.AdditionalAttrs;
+    Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+                  std::make_pair(FlippedWeight, Attrs));
+  };
+
+  SmallVector<ConstantExpr *, 4> ConstantExprs;
+  for (const Edge &E : Edges) {
+    addEdgeToGraph(E);
+    if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To))
+      ConstantExprs.push_back(Constexpr);
+    if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From))
+      ConstantExprs.push_back(Constexpr);
+  }
+
+  for (ConstantExpr *CE : ConstantExprs) {
+    Edges.clear();
+    constexprToEdges(Analysis, *CE, Edges);
+    std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph);
+  }
+}
+
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn,
+                           SmallVectorImpl<Value *> &ReturnedValues,
+                           NodeMapT &Map, GraphT &Graph) {
+  for (auto &Bb : Fn->getBasicBlockList())
+    for (auto &Inst : Bb.getInstList())
+      addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph);
+}
+
+static bool canSkipAddingToSets(Value *Val) {
+  // Constants can share instances, which may falsely unify multiple
+  // sets, e.g. in
+  // store i32* null, i32** %ptr1
+  // store i32* null, i32** %ptr2
+  // clearly ptr1 and ptr2 should not be unified into the same set, so
+  // we should filter out the (potentially shared) instance to
+  // i32* null.
+  if (isa<Constant>(Val)) {
+    bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) ||
+                     isa<ConstantStruct>(Val);
+    // TODO: Because all of these things are constant, we can determine whether
+    // the data is *actually* mutable at graph building time. This will probably
+    // come for free/cheap with offset awareness.
+    bool CanStoreMutableData =
+        isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container;
+    return !CanStoreMutableData;
+  }
+
+  return false;
+}
+
+// Builds the graph + StratifiedSets for a function.
+CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {
+  NodeMapT Map;
+  GraphT Graph;
+  SmallVector<Value *, 4> ReturnedValues;
+
+  buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph);
+
+  DenseMap<GraphT::Node, Value *> NodeValueMap;
+  NodeValueMap.resize(Map.size());
+  for (const auto &Pair : Map)
+    NodeValueMap.insert(std::make_pair(Pair.second, Pair.first));
+
+  const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) {
+    auto ValIter = NodeValueMap.find(Node);
+    assert(ValIter != NodeValueMap.end());
+    return ValIter->second;
+  };
+
+  StratifiedSetsBuilder<Value *> Builder;
+
+  SmallVector<GraphT::Node, 16> Worklist;
+  for (auto &Pair : Map) {
+    Worklist.clear();
+
+    auto *Value = Pair.first;
+    Builder.add(Value);
+    auto InitialNode = Pair.second;
+    Worklist.push_back(InitialNode);
+    while (!Worklist.empty()) {
+      auto Node = Worklist.pop_back_val();
+      auto *CurValue = findValueOrDie(Node);
+      if (canSkipAddingToSets(CurValue))
+        continue;
+
+      for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
+        auto Weight = std::get<0>(EdgeTuple);
+        auto Label = Weight.first;
+        auto &OtherNode = std::get<1>(EdgeTuple);
+        auto *OtherValue = findValueOrDie(OtherNode);
+
+        if (canSkipAddingToSets(OtherValue))
+          continue;
+
+        bool Added;
+        switch (directionOfEdgeType(Label)) {
+        case Level::Above:
+          Added = Builder.addAbove(CurValue, OtherValue);
+          break;
+        case Level::Below:
+          Added = Builder.addBelow(CurValue, OtherValue);
+          break;
+        case Level::Same:
+          Added = Builder.addWith(CurValue, OtherValue);
+          break;
+        }
+
+        auto Aliasing = Weight.second;
+        if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+          Aliasing.set(*MaybeCurIndex);
+        if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+          Aliasing.set(*MaybeOtherIndex);
+        Builder.noteAttributes(CurValue, Aliasing);
+        Builder.noteAttributes(OtherValue, Aliasing);
+
+        if (Added)
+          Worklist.push_back(OtherNode);
+      }
+    }
+  }
+
+  // There are times when we end up with parameters not in our graph (i.e. if
+  // it's only used as the condition of a branch). Other bits of code depend on
+  // things that were present during construction being present in the graph.
+  // So, we add all present arguments here.
+  for (auto &Arg : Fn->args()) {
+    if (!Builder.add(&Arg))
+      continue;
+
+    auto Attrs = valueToAttrIndex(&Arg);
+    if (Attrs.hasValue())
+      Builder.noteAttributes(&Arg, *Attrs);
+  }
+
+  return FunctionInfo(Builder.build(), std::move(ReturnedValues));
+}
+
+void CFLAAResult::scan(Function *Fn) {
+  auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
+  (void)InsertPair;
+  assert(InsertPair.second &&
+         "Trying to scan a function that has already been cached");
+
+  FunctionInfo Info(buildSetsFrom(Fn));
+  Cache[Fn] = std::move(Info);
+  Handles.push_front(FunctionHandle(Fn, this));
+}
+
+void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); }
+
+/// \brief Ensures that the given function is available in the cache.
+/// Returns the appropriate entry from the cache.
+const Optional<CFLAAResult::FunctionInfo> &
+CFLAAResult::ensureCached(Function *Fn) {
+  auto Iter = Cache.find(Fn);
+  if (Iter == Cache.end()) {
+    scan(Fn);
+    Iter = Cache.find(Fn);
+    assert(Iter != Cache.end());
+    assert(Iter->second.hasValue());
+  }
+  return Iter->second;
+}
+
+AliasResult CFLAAResult::query(const MemoryLocation &LocA,
+                               const MemoryLocation &LocB) {
+  auto *ValA = const_cast<Value *>(LocA.Ptr);
+  auto *ValB = const_cast<Value *>(LocB.Ptr);
+
+  Function *Fn = nullptr;
+  auto MaybeFnA = parentFunctionOfValue(ValA);
+  auto MaybeFnB = parentFunctionOfValue(ValB);
+  if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+    // The only times this is known to happen are when globals + InlineAsm
+    // are involved
+    DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
+    return MayAlias;
+  }
+
+  if (MaybeFnA.hasValue()) {
+    Fn = *MaybeFnA;
+    assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+           "Interprocedural queries not supported");
+  } else {
+    Fn = *MaybeFnB;
+  }
+
+  assert(Fn != nullptr);
+  auto &MaybeInfo = ensureCached(Fn);
+  assert(MaybeInfo.hasValue());
+
+  auto &Sets = MaybeInfo->Sets;
+  auto MaybeA = Sets.find(ValA);
+  if (!MaybeA.hasValue())
+    return MayAlias;
+
+  auto MaybeB = Sets.find(ValB);
+  if (!MaybeB.hasValue())
+    return MayAlias;
+
+  auto SetA = *MaybeA;
+  auto SetB = *MaybeB;
+  auto AttrsA = Sets.getLink(SetA.Index).Attrs;
+  auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+
+  // Stratified set attributes are used as markets to signify whether a member
+  // of a StratifiedSet (or a member of a set above the current set) has
+  // interacted with either arguments or globals. "Interacted with" meaning
+  // its value may be different depending on the value of an argument or
+  // global. The thought behind this is that, because arguments and globals
+  // may alias each other, if AttrsA and AttrsB have touched args/globals,
+  // we must conservatively say that they alias. However, if at least one of
+  // the sets has no values that could legally be altered by changing the value
+  // of an argument or global, then we don't have to be as conservative.
+  if (AttrsA.any() && AttrsB.any())
+    return MayAlias;
+
+  // We currently unify things even if the accesses to them may not be in
+  // bounds, so we can't return partial alias here because we don't
+  // know whether the pointer is really within the object or not.
+  // IE Given an out of bounds GEP and an alloca'd pointer, we may
+  // unify the two. We can't return partial alias for this case.
+  // Since we do not currently track enough information to
+  // differentiate
+
+  if (SetA.Index == SetB.Index)
+    return MayAlias;
+
+  return NoAlias;
+}
+
+CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char CFLAA::PassID;
+
+char CFLAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+                    false, true)
+
+ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); }
+
+CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) {
+  initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool CFLAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(
+      new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool CFLAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
new file mode 100644
index 0000000..4a03002
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -0,0 +1,72 @@
+//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+char CGSCCAnalysisManagerModuleProxy::PassID;
+
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module &M) {
+  assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!");
+  return Result(*CGAM);
+}
+
+CGSCCAnalysisManagerModuleProxy::Result::~Result() {
+  // Clear out the analysis manager if we're being destroyed -- it means we
+  // didn't even see an invalidate call when we got invalidated.
+  CGAM->clear();
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+    Module &M, const PreservedAnalyses &PA) {
+  // If this proxy isn't marked as preserved, then we can't even invalidate
+  // individual CGSCC analyses, there may be an invalid set of SCC objects in
+  // the cache making it impossible to incrementally preserve them.
+  // Just clear the entire manager.
+  if (!PA.preserved(ID()))
+    CGAM->clear();
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+char ModuleAnalysisManagerCGSCCProxy::PassID;
+
+char FunctionAnalysisManagerCGSCCProxy::PassID;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C) {
+  assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!");
+  return Result(*FAM);
+}
+
+FunctionAnalysisManagerCGSCCProxy::Result::~Result() {
+  // Clear out the analysis manager if we're being destroyed -- it means we
+  // didn't even see an invalidate call when we got invalidated.
+  FAM->clear();
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+    LazyCallGraph::SCC &C, const PreservedAnalyses &PA) {
+  // If this proxy isn't marked as preserved, then we can't even invalidate
+  // individual function analyses, there may be an invalid set of Function
+  // objects in the cache making it impossible to incrementally preserve them.
+  // Just clear the entire manager.
+  if (!PA.preserved(ID()))
+    FAM->clear();
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+char CGSCCAnalysisManagerFunctionProxy::PassID;
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
new file mode 100644
index 0000000..7cec962
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -0,0 +1,306 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraph class methods.
+//
+
+CallGraph::CallGraph(Module &M)
+    : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
+      CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
+  // Add every function to the call graph.
+  for (Function &F : M)
+    addToCallGraph(&F);
+
+  // If we didn't find a main function, use the external call graph node
+  if (!Root)
+    Root = ExternalCallingNode;
+}
+
+CallGraph::CallGraph(CallGraph &&Arg)
+    : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+      ExternalCallingNode(Arg.ExternalCallingNode),
+      CallsExternalNode(std::move(Arg.CallsExternalNode)) {
+  Arg.FunctionMap.clear();
+  Arg.Root = nullptr;
+  Arg.ExternalCallingNode = nullptr;
+}
+
+CallGraph::~CallGraph() {
+  // CallsExternalNode is not in the function map, delete it explicitly.
+  if (CallsExternalNode)
+    CallsExternalNode->allReferencesDropped();
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
+#ifndef NDEBUG
+  for (auto &I : FunctionMap)
+    I.second->allReferencesDropped();
+#endif
+}
+
+void CallGraph::addToCallGraph(Function *F) {
+  CallGraphNode *Node = getOrInsertFunction(F);
+
+  // If this function has external linkage, anything could call it.
+  if (!F->hasLocalLinkage()) {
+    ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+    // Found the entry point?
+    if (F->getName() == "main") {
+      if (Root) // Found multiple external mains?  Don't pick one.
+        Root = ExternalCallingNode;
+      else
+        Root = Node; // Found a main, keep track of it!
+    }
+  }
+
+  // If this function has its address taken, anything could call it.
+  if (F->hasAddressTaken())
+    ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+  // If this function is not defined in this translation unit, it could call
+  // anything.
+  if (F->isDeclaration() && !F->isIntrinsic())
+    Node->addCalledFunction(CallSite(), CallsExternalNode.get());
+
+  // Look for calls by this function.
+  for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+         ++II) {
+      CallSite CS(cast<Value>(II));
+      if (CS) {
+        const Function *Callee = CS.getCalledFunction();
+        if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
+          // Indirect calls of intrinsics are not allowed so no need to check.
+          // We can be more precise here by using TargetArg returned by
+          // Intrinsic::isLeaf.
+          Node->addCalledFunction(CS, CallsExternalNode.get());
+        else if (!Callee->isIntrinsic())
+          Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+      }
+    }
+}
+
+void CallGraph::print(raw_ostream &OS) const {
+  OS << "CallGraph Root is: ";
+  if (Function *F = Root->getFunction())
+    OS << F->getName() << "\n";
+  else {
+    OS << "<<null function: 0x" << Root << ">>\n";
+  }
+
+  // Print in a deterministic order by sorting CallGraphNodes by name.  We do
+  // this here to avoid slowing down the non-printing fast path.
+
+  SmallVector<CallGraphNode *, 16> Nodes;
+  Nodes.reserve(FunctionMap.size());
+
+  for (auto I = begin(), E = end(); I != E; ++I)
+    Nodes.push_back(I->second.get());
+
+  std::sort(Nodes.begin(), Nodes.end(),
+            [](CallGraphNode *LHS, CallGraphNode *RHS) {
+    if (Function *LF = LHS->getFunction())
+      if (Function *RF = RHS->getFunction())
+        return LF->getName() < RF->getName();
+
+    return RHS->getFunction() != nullptr;
+  });
+
+  for (CallGraphNode *CN : Nodes)
+    CN->print(OS);
+}
+
+LLVM_DUMP_METHOD
+void CallGraph::dump() const { print(dbgs()); }
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it.  Because this removes the function from the module, the call graph node
+// is destroyed.  This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN).  The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+  assert(CGN->empty() && "Cannot remove function from call "
+         "graph if it references other functions!");
+  Function *F = CGN->getFunction(); // Get the function for the call graph node
+  FunctionMap.erase(F);             // Remove the call graph node from the map
+
+  M.getFunctionList().remove(F);
+  return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+  assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+  assert(!FunctionMap.count(To) &&
+         "Pointing CallGraphNode at a function that already exists");
+  FunctionMapTy::iterator I = FunctionMap.find(From);
+  I->second->F = const_cast<Function*>(To);
+  FunctionMap[To] = std::move(I->second);
+  FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+  auto &CGN = FunctionMap[F];
+  if (CGN)
+    return CGN.get();
+
+  assert((!F || F->getParent() == &M) && "Function not in current module!");
+  CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+  return CGN.get();
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraphNode class methods.
+//
+
+void CallGraphNode::print(raw_ostream &OS) const {
+  if (Function *F = getFunction())
+    OS << "Call graph node for function: '" << F->getName() << "'";
+  else
+    OS << "Call graph node <<null function>>";
+  
+  OS << "<<" << this << ">>  #uses=" << getNumReferences() << '\n';
+
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  CS<" << I->first << "> calls ";
+    if (Function *FI = I->second->getFunction())
+      OS << "function '" << FI->getName() <<"'\n";
+    else
+      OS << "external node\n";
+  }
+  OS << '\n';
+}
+
+LLVM_DUMP_METHOD
+void CallGraphNode::dump() const { print(dbgs()); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site.  Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function.  This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+  for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+    if (CalledFunctions[i].second == Callee) {
+      Callee->DropRef();
+      CalledFunctions[i] = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      --i; --e;
+    }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+    CallRecord &CR = *I;
+    if (CR.second == Callee && CR.first == nullptr) {
+      Callee->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one.  Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+                                    CallSite NewCS, CallGraphNode *NewNode){
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      I->first = NewCS.getInstruction();
+      I->second = NewNode;
+      NewNode->AddRef();
+      return;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Out-of-line definitions of CallGraphAnalysis class members.
+//
+
+char CallGraphAnalysis::PassID;
+
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraphWrapperPass class methods.
+//
+
+CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) {
+  initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+CallGraphWrapperPass::~CallGraphWrapperPass() {}
+
+void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
+
+bool CallGraphWrapperPass::runOnModule(Module &M) {
+  // All the real work is done in the constructor for the CallGraph.
+  G.reset(new CallGraph(M));
+  return false;
+}
+
+INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
+                false, true)
+
+char CallGraphWrapperPass::ID = 0;
+
+void CallGraphWrapperPass::releaseMemory() { G.reset(); }
+
+void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
+  if (!G) {
+    OS << "No call graph has been built!\n";
+    return;
+  }
+
+  // Just delegate.
+  G->print(OS);
+}
+
+LLVM_DUMP_METHOD
+void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
new file mode 100644
index 0000000..07b389a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -0,0 +1,632 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph.  Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "cgscc-passmgr"
+
+static cl::opt<unsigned> 
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+  static char ID;
+  explicit CGPassManager() 
+    : ModulePass(ID), PMDataManager() { }
+
+  /// Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M) override;
+
+  using ModulePass::doInitialization;
+  using ModulePass::doFinalization;
+
+  bool doInitialization(CallGraph &CG);
+  bool doFinalization(CallGraph &CG);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const override {
+    // CGPassManager walks SCC and it needs CallGraph.
+    Info.addRequired<CallGraphWrapperPass>();
+    Info.setPreservesAll();
+  }
+
+  const char *getPassName() const override {
+    return "CallGraph Pass Manager";
+  }
+
+  PMDataManager *getAsPMDataManager() override { return this; }
+  Pass *getAsPass() override { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) override {
+    errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      Pass *P = getContainedPass(Index);
+      P->dumpPassStructure(Offset + 1);
+      dumpLastUses(P, Offset+1);
+    }
+  }
+
+  Pass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<Pass *>(PassVector[N]);
+  }
+
+  PassManagerType getPassManagerType() const override {
+    return PMT_CallGraphPassManager; 
+  }
+  
+private:
+  bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                         bool &DevirtualizedCall);
+  
+  bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate,
+                    bool &DevirtualizedCall);
+  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+                        bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate,
+                                 bool &DevirtualizedCall) {
+  bool Changed = false;
+  PMDataManager *PM = P->getAsPMDataManager();
+
+  if (!PM) {
+    CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+    if (!CallGraphUpToDate) {
+      DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+      CallGraphUpToDate = true;
+    }
+
+    {
+      TimeRegion PassTimer(getPassTimer(CGSP));
+      Changed = CGSP->runOnSCC(CurSCC);
+    }
+    
+    // After the CGSCCPass is done, when assertions are enabled, use
+    // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+    if (Changed)
+      RefreshCallGraph(CurSCC, CG, true);
+#endif
+    
+    return Changed;
+  }
+  
+  
+  assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+         "Invalid CGPassManager member");
+  FPPassManager *FPP = (FPPassManager*)P;
+  
+  // Run pass P on all functions in the current SCC.
+  for (CallGraphNode *CGN : CurSCC) {
+    if (Function *F = CGN->getFunction()) {
+      dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+      {
+        TimeRegion PassTimer(getPassTimer(FPP));
+        Changed |= FPP->runOnFunction(*F);
+      }
+      F->getContext().yield();
+    }
+  }
+  
+  // The function pass(es) modified the IR, they may have clobbered the
+  // callgraph.
+  if (Changed && CallGraphUpToDate) {
+    DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+                 << P->getPassName() << '\n');
+    CallGraphUpToDate = false;
+  }
+  return Changed;
+}
+
+
+/// Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it.  This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call.  This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+                                     CallGraph &CG, bool CheckingMode) {
+  DenseMap<Value*, CallGraphNode*> CallSites;
+  
+  DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+               << " nodes:\n";
+        for (CallGraphNode *CGN : CurSCC)
+          CGN->dump();
+        );
+
+  bool MadeChange = false;
+  bool DevirtualizedCall = false;
+  
+  // Scan all functions in the SCC.
+  unsigned FunctionNo = 0;
+  for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+       SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+    CallGraphNode *CGN = *SCCIdx;
+    Function *F = CGN->getFunction();
+    if (!F || F->isDeclaration()) continue;
+    
+    // Walk the function body looking for call sites.  Sync up the call sites in
+    // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+    
+    // Get the set of call sites currently in the function.
+    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+      // If this call site is null, then the function pass deleted the call
+      // entirely and the WeakVH nulled it out.  
+      if (!I->first ||
+          // If we've already seen this call site, then the FunctionPass RAUW'd
+          // one call with another, which resulted in two "uses" in the edge
+          // list of the same call.
+          CallSites.count(I->first) ||
+
+          // If the call edge is not from a call or invoke, or it is a
+          // instrinsic call, then the function pass RAUW'd a call with 
+          // another value. This can happen when constant folding happens
+          // of well known functions etc.
+          !CallSite(I->first) ||
+          (CallSite(I->first).getCalledFunction() &&
+           CallSite(I->first).getCalledFunction()->isIntrinsic() &&
+           Intrinsic::isLeaf(
+               CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+        
+        // If this was an indirect call site, count it.
+        if (!I->second->getFunction())
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
+        // Just remove the edge from the set of callees, keep track of whether
+        // I points to the last element of the vector.
+        bool WasLast = I + 1 == E;
+        CGN->removeCallEdge(I);
+        
+        // If I pointed to the last element of the vector, we have to bail out:
+        // iterator checking rejects comparisons of the resultant pointer with
+        // end.
+        if (WasLast)
+          break;
+        E = CGN->end();
+        continue;
+      }
+      
+      assert(!CallSites.count(I->first) &&
+             "Call site occurs in node multiple times");
+      
+      CallSite CS(I->first);
+      if (CS) {
+        Function *Callee = CS.getCalledFunction();
+        // Ignore intrinsics because they're not really function calls.
+        if (!Callee || !(Callee->isIntrinsic()))
+          CallSites.insert(std::make_pair(I->first, I->second));
+      }
+      ++I;
+    }
+    
+    // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS(cast<Value>(I));
+        if (!CS) continue;
+        Function *Callee = CS.getCalledFunction();
+        if (Callee && Callee->isIntrinsic()) continue;
+        
+        // If this call site already existed in the callgraph, just verify it
+        // matches up to expectations and remove it from CallSites.
+        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+          CallSites.find(CS.getInstruction());
+        if (ExistingIt != CallSites.end()) {
+          CallGraphNode *ExistingNode = ExistingIt->second;
+
+          // Remove from CallSites since we have now seen it.
+          CallSites.erase(ExistingIt);
+          
+          // Verify that the callee is right.
+          if (ExistingNode->getFunction() == CS.getCalledFunction())
+            continue;
+          
+          // If we are in checking mode, we are not allowed to actually mutate
+          // the callgraph.  If this is a case where we can infer that the
+          // callgraph is less precise than it could be (e.g. an indirect call
+          // site could be turned direct), don't reject it in checking mode, and
+          // don't tweak it to be more precise.
+          if (CheckingMode && CS.getCalledFunction() &&
+              ExistingNode->getFunction() == nullptr)
+            continue;
+          
+          assert(!CheckingMode &&
+                 "CallGraphSCCPass did not update the CallGraph correctly!");
+          
+          // If not, we either went from a direct call to indirect, indirect to
+          // direct, or direct to different direct.
+          CallGraphNode *CalleeNode;
+          if (Function *Callee = CS.getCalledFunction()) {
+            CalleeNode = CG.getOrInsertFunction(Callee);
+            // Keep track of whether we turned an indirect call into a direct
+            // one.
+            if (!ExistingNode->getFunction()) {
+              DevirtualizedCall = true;
+              DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
+                           << Callee->getName() << "'\n");
+            }
+          } else {
+            CalleeNode = CG.getCallsExternalNode();
+          }
+
+          // Update the edge target in CGN.
+          CGN->replaceCallEdge(CS, CS, CalleeNode);
+          MadeChange = true;
+          continue;
+        }
+        
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+
+        // If the call site didn't exist in the CGN yet, add it.
+        CallGraphNode *CalleeNode;
+        if (Function *Callee = CS.getCalledFunction()) {
+          CalleeNode = CG.getOrInsertFunction(Callee);
+          ++NumDirectAdded;
+        } else {
+          CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
+        
+        CGN->addCalledFunction(CS, CalleeNode);
+        MadeChange = true;
+      }
+    
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
+    // After scanning this function, if we still have entries in callsites, then
+    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // this happens.
+    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    
+    // Periodically do an explicit clear to remove tombstones when processing
+    // large scc's.
+    if ((FunctionNo & 15) == 15)
+      CallSites.clear();
+  }
+
+  DEBUG(if (MadeChange) {
+          dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+          for (CallGraphNode *CGN : CurSCC)
+            CGN->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+         } else {
+           dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+         }
+        );
+  (void)MadeChange;
+
+  return DevirtualizedCall;
+}
+
+/// Execute the body of the entire pass manager on the specified SCC.
+/// This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                                      bool &DevirtualizedCall) {
+  bool Changed = false;
+  
+  // Keep track of whether the callgraph is known to be up-to-date or not.
+  // The CGSSC pass manager runs two types of passes:
+  // CallGraphSCC Passes and other random function passes.  Because other
+  // random function passes are not CallGraph aware, they may clobber the
+  // call graph by introducing new calls or deleting other ones.  This flag
+  // is set to false when we run a function pass so that we know to clean up
+  // the callgraph when we need to run a CGSCCPass again.
+  bool CallGraphUpToDate = true;
+
+  // Run all passes on current SCC.
+  for (unsigned PassNo = 0, e = getNumContainedPasses();
+       PassNo != e; ++PassNo) {
+    Pass *P = getContainedPass(PassNo);
+    
+    // If we're in -debug-pass=Executions mode, construct the SCC node list,
+    // otherwise avoid constructing this string as it is expensive.
+    if (isPassDebuggingExecutionsOrMore()) {
+      std::string Functions;
+  #ifndef NDEBUG
+      raw_string_ostream OS(Functions);
+      for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+           I != E; ++I) {
+        if (I != CurSCC.begin()) OS << ", ";
+        (*I)->print(OS);
+      }
+      OS.flush();
+  #endif
+      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+    }
+    dumpRequiredSet(P);
+    
+    initializeAnalysisImpl(P);
+    
+    // Actually run this pass on the current SCC.
+    Changed |= RunPassOnSCC(P, CurSCC, CG,
+                            CallGraphUpToDate, DevirtualizedCall);
+    
+    if (Changed)
+      dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+    dumpPreservedSet(P);
+    
+    verifyPreservedAnalysis(P);      
+    removeNotPreservedAnalysis(P);
+    recordAvailableAnalysis(P);
+    removeDeadPasses(P, "", ON_CG_MSG);
+  }
+  
+  // If the callgraph was left out of date (because the last pass run was a
+  // functionpass), refresh it before we move on to the next SCC.
+  if (!CallGraphUpToDate)
+    DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+  return Changed;
+}
+
+/// Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+  bool Changed = doInitialization(CG);
+  
+  // Walk the callgraph in bottom-up SCC order.
+  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+  CallGraphSCC CurSCC(&CGI);
+  while (!CGI.isAtEnd()) {
+    // Copy the current SCC and increment past it so that the pass can hack
+    // on the SCC if it wants to without invalidating our iterator.
+    const std::vector<CallGraphNode *> &NodeVec = *CGI;
+    CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
+    ++CGI;
+
+    // At the top level, we run all the passes in this pass manager on the
+    // functions in this SCC.  However, we support iterative compilation in the
+    // case where a function pass devirtualizes a call to a function.  For
+    // example, it is very common for a function pass (often GVN or instcombine)
+    // to eliminate the addressing that feeds into a call.  With that improved
+    // information, we would like the call to be an inline candidate, infer
+    // mod-ref information etc.
+    //
+    // Because of this, we allow iteration up to a specified iteration count.
+    // This only happens in the case of a devirtualized call, so we only burn
+    // compile time in the case that we're making progress.  We also have a hard
+    // iteration count limit in case there is crazy code.
+    unsigned Iteration = 0;
+    bool DevirtualizedCall = false;
+    do {
+      DEBUG(if (Iteration)
+              dbgs() << "  SCCPASSMGR: Re-visiting SCC, iteration #"
+                     << Iteration << '\n');
+      DevirtualizedCall = false;
+      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+    } while (Iteration++ < MaxIterations && DevirtualizedCall);
+    
+    if (DevirtualizedCall)
+      DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                   << " times, due to -max-cg-scc-iterations\n");
+    
+    if (Iteration > MaxSCCIterations)
+      MaxSCCIterations = Iteration;
+    
+  }
+  Changed |= doFinalization(CG);
+  return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+    }
+  }
+  return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+    }
+  }
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+  assert(Old != New && "Should not replace node with self");
+  for (unsigned i = 0; ; ++i) {
+    assert(i != Nodes.size() && "Node not in SCC");
+    if (Nodes[i] != Old) continue;
+    Nodes[i] = New;
+    break;
+  }
+  
+  // Update the active scc_iterator so that it doesn't contain dangling
+  // pointers to the old CallGraphNode.
+  scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+  CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+                                         PassManagerType PreferredType) {
+  // Find CGPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+    PMS.pop();
+
+  assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+  CGPassManager *CGP;
+  
+  if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+    CGP = (CGPassManager*)PMS.top();
+  else {
+    // Create new Call Graph SCC Pass Manager if it does not exist. 
+    assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    CGP = new CGPassManager();
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(CGP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = CGP;
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(CGP);
+  }
+
+  CGP->add(this);
+}
+
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<CallGraphWrapperPass>();
+  AU.addPreserved<CallGraphWrapperPass>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+  ///
+  class PrintCallGraphPass : public CallGraphSCCPass {
+    std::string Banner;
+    raw_ostream &Out;       // raw_ostream to print on.
+    
+  public:
+    static char ID;
+    PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+
+    bool runOnSCC(CallGraphSCC &SCC) override {
+      Out << Banner;
+      for (CallGraphNode *CGN : SCC) {
+        if (CGN->getFunction())
+          CGN->getFunction()->print(Out);
+        else
+          Out << "\nPrinting <null> Function\n";
+      }
+      return false;
+    }
+  };
+  
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/contrib/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp
new file mode 100644
index 0000000..68dcd3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp
@@ -0,0 +1,92 @@
+//===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot
+// containing the call graph of a module.
+//
+// There is also a pass available to directly call dotty ('-view-callgraph').
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits {
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(CallGraph *Graph) { return "Call graph"; }
+
+  std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
+    if (Function *Func = Node->getFunction())
+      return Func->getName();
+
+    return "external node";
+  }
+};
+
+struct AnalysisCallGraphWrapperPassTraits {
+  static CallGraph *getGraph(CallGraphWrapperPass *P) {
+    return &P->getCallGraph();
+  }
+};
+
+} // end llvm namespace
+
+namespace {
+
+struct CallGraphViewer
+    : public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
+                                        AnalysisCallGraphWrapperPassTraits> {
+  static char ID;
+
+  CallGraphViewer()
+      : DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
+                                   AnalysisCallGraphWrapperPassTraits>(
+            "callgraph", ID) {
+    initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct CallGraphPrinter : public DOTGraphTraitsModulePrinter<
+                              CallGraphWrapperPass, true, CallGraph *,
+                              AnalysisCallGraphWrapperPassTraits> {
+  static char ID;
+
+  CallGraphPrinter()
+      : DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *,
+                                    AnalysisCallGraphWrapperPassTraits>(
+            "callgraph", ID) {
+    initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+} // end anonymous namespace
+
+char CallGraphViewer::ID = 0;
+INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false,
+                false)
+
+char CallGraphPrinter::ID = 0;
+INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
+                "Print call graph to 'dot' file", false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); }
+
+ModulePass *llvm::createCallGraphPrinterPass() {
+  return new CallGraphPrinter();
+}
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 0000000..1add2fa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,315 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call.  Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global.  Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+CaptureTracker::~CaptureTracker() {}
+
+bool CaptureTracker::shouldExplore(const Use *U) { return true; }
+
+namespace {
+  struct SimpleCaptureTracker : public CaptureTracker {
+    explicit SimpleCaptureTracker(bool ReturnCaptures)
+      : ReturnCaptures(ReturnCaptures), Captured(false) {}
+
+    void tooManyUses() override { Captured = true; }
+
+    bool captured(const Use *U) override {
+      if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+        return false;
+
+      Captured = true;
+      return true;
+    }
+
+    bool ReturnCaptures;
+
+    bool Captured;
+  };
+
+  /// Only find pointer captures which happen before the given instruction. Uses
+  /// the dominator tree to determine whether one instruction is before another.
+  /// Only support the case where the Value is defined in the same basic block
+  /// as the given instruction and the use.
+  struct CapturesBefore : public CaptureTracker {
+
+    CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
+                   bool IncludeI, OrderedBasicBlock *IC)
+      : OrderedBB(IC), BeforeHere(I), DT(DT),
+        ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
+
+    void tooManyUses() override { Captured = true; }
+
+    bool isSafeToPrune(Instruction *I) {
+      BasicBlock *BB = I->getParent();
+      // We explore this usage only if the usage can reach "BeforeHere".
+      // If use is not reachable from entry, there is no need to explore.
+      if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+        return true;
+
+      // Compute the case where both instructions are inside the same basic
+      // block. Since instructions in the same BB as BeforeHere are numbered in
+      // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable'
+      // which are very expensive for large basic blocks.
+      if (BB == BeforeHere->getParent()) {
+        // 'I' dominates 'BeforeHere' => not safe to prune.
+        //
+        // The value defined by an invoke dominates an instruction only
+        // if it dominates every instruction in UseBB. A PHI is dominated only
+        // if the instruction dominates every possible use in the UseBB. Since
+        // UseBB == BB, avoid pruning.
+        if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
+          return false;
+        if (!OrderedBB->dominates(BeforeHere, I))
+          return false;
+
+        // 'BeforeHere' comes before 'I', it's safe to prune if we also
+        // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or
+        // by its successors, i.e, prune if:
+        //
+        //  (1) BB is an entry block or have no sucessors.
+        //  (2) There's no path coming back through BB sucessors.
+        if (BB == &BB->getParent()->getEntryBlock() ||
+            !BB->getTerminator()->getNumSuccessors())
+          return true;
+
+        SmallVector<BasicBlock*, 32> Worklist;
+        Worklist.append(succ_begin(BB), succ_end(BB));
+        return !isPotentiallyReachableFromMany(Worklist, BB, DT);
+      }
+
+      // If the value is defined in the same basic block as use and BeforeHere,
+      // there is no need to explore the use if BeforeHere dominates use.
+      // Check whether there is a path from I to BeforeHere.
+      if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+          !isPotentiallyReachable(I, BeforeHere, DT))
+        return true;
+
+      return false;
+    }
+
+    bool shouldExplore(const Use *U) override {
+      Instruction *I = cast<Instruction>(U->getUser());
+
+      if (BeforeHere == I && !IncludeI)
+        return false;
+
+      if (isSafeToPrune(I))
+        return false;
+
+      return true;
+    }
+
+    bool captured(const Use *U) override {
+      if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+        return false;
+
+      if (!shouldExplore(U))
+        return false;
+
+      Captured = true;
+      return true;
+    }
+
+    OrderedBasicBlock *OrderedBB;
+    const Instruction *BeforeHere;
+    DominatorTree *DT;
+
+    bool ReturnCaptures;
+    bool IncludeI;
+
+    bool Captured;
+  };
+}
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist).  This routine can
+/// be expensive, so consider caching the results.  The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not.  The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+                                bool ReturnCaptures, bool StoreCaptures) {
+  assert(!isa<GlobalValue>(V) &&
+         "It doesn't make sense to ask whether a global is captured.");
+
+  // TODO: If StoreCaptures is not true, we could do Fancy analysis
+  // to determine whether this store is not actually an escape point.
+  // In that case, BasicAliasAnalysis should be updated as well to
+  // take advantage of this.
+  (void)StoreCaptures;
+
+  SimpleCaptureTracker SCT(ReturnCaptures);
+  PointerMayBeCaptured(V, &SCT);
+  return SCT.Captured;
+}
+
+/// PointerMayBeCapturedBefore - Return true if this pointer value may be
+/// captured by the enclosing function (which is required to exist). If a
+/// DominatorTree is provided, only captures which happen before the given
+/// instruction are considered. This routine can be expensive, so consider
+/// caching the results.  The boolean ReturnCaptures specifies whether
+/// returning the value (or part of it) from the function counts as capturing
+/// it or not.  The boolean StoreCaptures specified whether storing the value
+/// (or part of it) into memory anywhere automatically counts as capturing it
+/// or not. A ordered basic block \p OBB can be used in order to speed up
+/// queries about relative order among instructions in the same basic block.
+bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
+                                      bool StoreCaptures, const Instruction *I,
+                                      DominatorTree *DT, bool IncludeI,
+                                      OrderedBasicBlock *OBB) {
+  assert(!isa<GlobalValue>(V) &&
+         "It doesn't make sense to ask whether a global is captured.");
+  bool UseNewOBB = OBB == nullptr;
+
+  if (!DT)
+    return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+  if (UseNewOBB)
+    OBB = new OrderedBasicBlock(I->getParent());
+
+  // TODO: See comment in PointerMayBeCaptured regarding what could be done
+  // with StoreCaptures.
+
+  CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
+  PointerMayBeCaptured(V, &CB);
+
+  if (UseNewOBB)
+    delete OBB;
+  return CB.Captured;
+}
+
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
+  assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+  SmallVector<const Use *, Threshold> Worklist;
+  SmallSet<const Use *, Threshold> Visited;
+  int Count = 0;
+
+  for (const Use &U : V->uses()) {
+    // If there are lots of uses, conservatively say that the value
+    // is captured to avoid taking too much compile time.
+    if (Count++ >= Threshold)
+      return Tracker->tooManyUses();
+
+    if (!Tracker->shouldExplore(&U)) continue;
+    Visited.insert(&U);
+    Worklist.push_back(&U);
+  }
+
+  while (!Worklist.empty()) {
+    const Use *U = Worklist.pop_back_val();
+    Instruction *I = cast<Instruction>(U->getUser());
+    V = U->get();
+
+    switch (I->getOpcode()) {
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      CallSite CS(I);
+      // Not captured if the callee is readonly, doesn't return a copy through
+      // its return value and doesn't unwind (a readonly function can leak bits
+      // by throwing an exception or not depending on the input value).
+      if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+        break;
+
+      // Not captured if only passed via 'nocapture' arguments.  Note that
+      // calling a function pointer does not in itself cause the pointer to
+      // be captured.  This is a subtle point considering that (for example)
+      // the callee might return its own address.  It is analogous to saying
+      // that loading a value from a pointer does not cause the pointer to be
+      // captured, even though the loaded value might be the pointer itself
+      // (think of self-referential objects).
+      CallSite::data_operand_iterator B =
+        CS.data_operands_begin(), E = CS.data_operands_end();
+      for (CallSite::data_operand_iterator A = B; A != E; ++A)
+        if (A->get() == V && !CS.doesNotCapture(A - B))
+          // The parameter is not marked 'nocapture' - captured.
+          if (Tracker->captured(U))
+            return;
+      break;
+    }
+    case Instruction::Load:
+      // Loading from a pointer does not cause it to be captured.
+      break;
+    case Instruction::VAArg:
+      // "va-arg" from a pointer does not cause it to be captured.
+      break;
+    case Instruction::Store:
+      if (V == I->getOperand(0))
+        // Stored the pointer - conservatively assume it may be captured.
+        if (Tracker->captured(U))
+          return;
+      // Storing to the pointee does not cause the pointer to be captured.
+      break;
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::PHI:
+    case Instruction::Select:
+    case Instruction::AddrSpaceCast:
+      // The original value is not captured via this if the new value isn't.
+      Count = 0;
+      for (Use &UU : I->uses()) {
+        // If there are lots of uses, conservatively say that the value
+        // is captured to avoid taking too much compile time.
+        if (Count++ >= Threshold)
+          return Tracker->tooManyUses();
+
+        if (Visited.insert(&UU).second)
+          if (Tracker->shouldExplore(&UU))
+            Worklist.push_back(&UU);
+      }
+      break;
+    case Instruction::ICmp:
+      // Don't count comparisons of a no-alias return value against null as
+      // captures. This allows us to ignore comparisons of malloc results
+      // with null, for example.
+      if (ConstantPointerNull *CPN =
+          dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+        if (CPN->getType()->getAddressSpace() == 0)
+          if (isNoAliasCall(V->stripPointerCasts()))
+            break;
+      // Otherwise, be conservative. There are crazy ways to capture pointers
+      // using comparisons.
+      if (Tracker->captured(U))
+        return;
+      break;
+    default:
+      // Something else - be conservative and say it is captured.
+      if (Tracker->captured(U))
+        return;
+      break;
+    }
+  }
+
+  // All uses examined.
+}
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 0000000..4090b4c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,184 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "code-metrics"
+
+using namespace llvm;
+
+static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
+                                    SmallPtrSetImpl<const Value*> &EphValues) {
+  SmallPtrSet<const Value *, 32> Visited;
+
+  // Make sure that all of the items in WorkSet are in our EphValues set.
+  EphValues.insert(WorkSet.begin(), WorkSet.end());
+
+  // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
+  // alive only by ephemeral values.
+
+  while (!WorkSet.empty()) {
+    const Value *V = WorkSet.front();
+    WorkSet.erase(WorkSet.begin());
+
+    if (!Visited.insert(V).second)
+      continue;
+
+    // If all uses of this value are ephemeral, then so is this value.
+    if (!std::all_of(V->user_begin(), V->user_end(),
+                     [&](const User *U) { return EphValues.count(U); }))
+      continue;
+
+    EphValues.insert(V);
+    DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+
+    if (const User *U = dyn_cast<User>(V))
+      for (const Value *J : U->operands()) {
+        if (isSafeToSpeculativelyExecute(J))
+          WorkSet.push_back(J);
+      }
+  }
+}
+
+// Find all ephemeral values.
+void CodeMetrics::collectEphemeralValues(
+    const Loop *L, AssumptionCache *AC,
+    SmallPtrSetImpl<const Value *> &EphValues) {
+  SmallVector<const Value *, 16> WorkSet;
+
+  for (auto &AssumeVH : AC->assumptions()) {
+    if (!AssumeVH)
+      continue;
+    Instruction *I = cast<Instruction>(AssumeVH);
+
+    // Filter out call sites outside of the loop so we don't to a function's
+    // worth of work for each of its loops (and, in the common case, ephemeral
+    // values in the loop are likely due to @llvm.assume calls in the loop).
+    if (!L->contains(I->getParent()))
+      continue;
+
+    WorkSet.push_back(I);
+  }
+
+  completeEphemeralValues(WorkSet, EphValues);
+}
+
+void CodeMetrics::collectEphemeralValues(
+    const Function *F, AssumptionCache *AC,
+    SmallPtrSetImpl<const Value *> &EphValues) {
+  SmallVector<const Value *, 16> WorkSet;
+
+  for (auto &AssumeVH : AC->assumptions()) {
+    if (!AssumeVH)
+      continue;
+    Instruction *I = cast<Instruction>(AssumeVH);
+    assert(I->getParent()->getParent() == F &&
+           "Found assumption for the wrong function!");
+    WorkSet.push_back(I);
+  }
+
+  completeEphemeralValues(WorkSet, EphValues);
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+                                    const TargetTransformInfo &TTI,
+                                    SmallPtrSetImpl<const Value*> &EphValues) {
+  ++NumBlocks;
+  unsigned NumInstsBeforeThisBB = NumInsts;
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    // Skip ephemeral values.
+    if (EphValues.count(&*II))
+      continue;
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      ImmutableCallSite CS(cast<Instruction>(II));
+
+      if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is
+        // extremely likely to get inlined in the future (it was probably
+        // exposed by an interleaved devirtualization pass).
+        if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          isRecursive = true;
+
+        if (TTI.isLoweredToCall(F))
+          ++NumCalls;
+      } else {
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
+      }
+    }
+
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+      ++NumVectorInsts;
+
+    if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB))
+      notDuplicatable = true;
+
+    if (const CallInst *CI = dyn_cast<CallInst>(II))
+      if (CI->cannotDuplicate())
+        notDuplicatable = true;
+
+    if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
+      if (InvI->cannotDuplicate())
+        notDuplicatable = true;
+
+    NumInsts += TTI.getUserCost(&*II);
+  }
+
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions
+  // with indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.  And as a QOI issue,
+  // if someone is using a blockaddress without an indirectbr, and that
+  // reference somehow ends up in another function or global, we probably
+  // don't want to inline this function.
+  notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 0000000..ccb5663
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1834 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic IR ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// DataLayout information. These functions cannot go in IR due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cerrno>
+#include <cmath>
+
+#ifdef HAVE_FENV_H
+#include <fenv.h>
+#endif
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// Constant fold bitcast, symbolically evaluating it with DataLayout.
+/// This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+  // Catch the obvious splat cases.
+  if (C->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+  if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+      !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+    return Constant::getAllOnesValue(DestTy);
+
+  // Handle a vector->integer cast.
+  if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
+    VectorType *VTy = dyn_cast<VectorType>(C->getType());
+    if (!VTy)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    unsigned NumSrcElts = VTy->getNumElements();
+    Type *SrcEltTy = VTy->getElementType();
+
+    // If the vector is a vector of floating point, convert it to vector of int
+    // to simplify things.
+    if (SrcEltTy->isFloatingPointTy()) {
+      unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+      Type *SrcIVTy =
+        VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+      // Ask IR to do the conversion now that #elts line up.
+      C = ConstantExpr::getBitCast(C, SrcIVTy);
+    }
+
+    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    if (!CDV)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    // Now that we know that the input value is a vector of integers, just shift
+    // and insert them into our result.
+    unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy);
+    APInt Result(IT->getBitWidth(), 0);
+    for (unsigned i = 0; i != NumSrcElts; ++i) {
+      Result <<= BitShift;
+      if (DL.isLittleEndian())
+        Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
+      else
+        Result |= CDV->getElementAsInteger(i);
+    }
+
+    return ConstantInt::get(IT, Result);
+  }
+
+  // The code below only handles casts to vectors currently.
+  VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (!DestVTy)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
+  }
+
+  // If this is a bitcast from constant vector -> vector, fold it.
+  if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  // If the element types match, IR can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = C->getType()->getVectorNumElements();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+
+  Type *SrcEltTy = C->getType()->getVectorElementType();
+  Type *DstEltTy = DestVTy->getElementType();
+
+  // Otherwise, we're changing the number of elements in a vector, which
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPointTy()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, DL);
+
+    // Finally, IR can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPointTy()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask IR to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    // If IR wasn't able to fold it, bail out.
+    if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
+        !isa<ConstantDataVector>(C))
+      return C;
+  }
+
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = DL.isLittleEndian();
+
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src,
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+    return ConstantVector::get(Result);
+  }
+
+  // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  unsigned Ratio = NumDstElt/NumSrcElt;
+  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
+
+  // Loop over each source value, expanding into multiple results.
+  for (unsigned i = 0; i != NumSrcElt; ++i) {
+    Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
+    if (!Src)  // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    for (unsigned j = 0; j != Ratio; ++j) {
+      // Shift the piece of the value into the right place, depending on
+      // endianness.
+      Constant *Elt = ConstantExpr::getLShr(Src,
+                                  ConstantInt::get(Src->getType(), ShiftAmt));
+      ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+
+      // Truncate the element to an integer with the same pointer size and
+      // convert the element back to a pointer using a inttoptr.
+      if (DstEltTy->isPointerTy()) {
+        IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
+        Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
+        Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
+        continue;
+      }
+
+      // Truncate and remember this piece.
+      Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+    }
+  }
+
+  return ConstantVector::get(Result);
+}
+
+
+/// If this constant is a constant offset from a global, return the global and
+/// the constant. Because of constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+                                       APInt &Offset, const DataLayout &DL) {
+  // Trivial case, constant is the global.
+  if ((GV = dyn_cast<GlobalValue>(C))) {
+    unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
+    Offset = APInt(BitWidth, 0);
+    return true;
+  }
+
+  // Otherwise, if this isn't a constant expr, bail out.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE) return false;
+
+  // Look through ptr->int and ptr->ptr casts.
+  if (CE->getOpcode() == Instruction::PtrToInt ||
+      CE->getOpcode() == Instruction::BitCast)
+    return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
+
+  // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
+  GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
+  if (!GEP)
+    return false;
+
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
+  APInt TmpOffset(BitWidth, 0);
+
+  // If the base isn't a global+constant, we aren't either.
+  if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL))
+    return false;
+
+  // Otherwise, add any offset that our operands provide.
+  if (!GEP->accumulateConstantOffset(DL, TmpOffset))
+    return false;
+
+  Offset = TmpOffset;
+  return true;
+}
+
+/// Recursive helper to read bits out of global. C is the constant being copied
+/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
+/// results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. DL is the DataLayout.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+                               unsigned char *CurPtr, unsigned BytesLeft,
+                               const DataLayout &DL) {
+  assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
+         "Out of range access");
+
+  // If this element is zero or undefined, we can just return since *CurPtr is
+  // zero initialized.
+  if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+    return true;
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    if (CI->getBitWidth() > 64 ||
+        (CI->getBitWidth() & 7) != 0)
+      return false;
+
+    uint64_t Val = CI->getZExtValue();
+    unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+
+    for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+      int n = ByteOffset;
+      if (!DL.isLittleEndian())
+        n = IntBytes - n - 1;
+      CurPtr[i] = (unsigned char)(Val >> (n * 8));
+      ++ByteOffset;
+    }
+    return true;
+  }
+
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isDoubleTy()) {
+      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
+    }
+    if (CFP->getType()->isFloatTy()){
+      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
+    }
+    if (CFP->getType()->isHalfTy()){
+      C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
+    }
+    return false;
+  }
+
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+    const StructLayout *SL = DL.getStructLayout(CS->getType());
+    unsigned Index = SL->getElementContainingOffset(ByteOffset);
+    uint64_t CurEltOffset = SL->getElementOffset(Index);
+    ByteOffset -= CurEltOffset;
+
+    while (1) {
+      // If the element access is to the element itself and not to tail padding,
+      // read the bytes from the element.
+      uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+      if (ByteOffset < EltSize &&
+          !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+                              BytesLeft, DL))
+        return false;
+
+      ++Index;
+
+      // Check to see if we read from the last struct element, if so we're done.
+      if (Index == CS->getType()->getNumElements())
+        return true;
+
+      // If we read all of the bytes we needed from this element we're done.
+      uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+      if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
+        return true;
+
+      // Move to the next element of the struct.
+      CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
+      BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
+      ByteOffset = 0;
+      CurEltOffset = NextEltOffset;
+    }
+    // not reached.
+  }
+
+  if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
+      isa<ConstantDataSequential>(C)) {
+    Type *EltTy = C->getType()->getSequentialElementType();
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+    uint64_t Index = ByteOffset / EltSize;
+    uint64_t Offset = ByteOffset - Index * EltSize;
+    uint64_t NumElts;
+    if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
+      NumElts = AT->getNumElements();
+    else
+      NumElts = C->getType()->getVectorNumElements();
+
+    for (; Index != NumElts; ++Index) {
+      if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
+                              BytesLeft, DL))
+        return false;
+
+      uint64_t BytesWritten = EltSize - Offset;
+      assert(BytesWritten <= EltSize && "Not indexing into this element?");
+      if (BytesWritten >= BytesLeft)
+        return true;
+
+      Offset = 0;
+      BytesLeft -= BytesWritten;
+      CurPtr += BytesWritten;
+    }
+    return true;
+  }
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->getOpcode() == Instruction::IntToPtr &&
+        CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
+      return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+                                BytesLeft, DL);
+    }
+  }
+
+  // Otherwise, unknown initializer type.
+  return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+                                                 const DataLayout &DL) {
+  PointerType *PTy = cast<PointerType>(C->getType());
+  Type *LoadTy = PTy->getElementType();
+  IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+
+  // If this isn't an integer load we can't fold it directly.
+  if (!IntType) {
+    unsigned AS = PTy->getAddressSpace();
+
+    // If this is a float/double load, we can try folding it as an int32/64 load
+    // and then bitcast the result.  This can be useful for union cases.  Note
+    // that address spaces don't matter here since we're not going to result in
+    // an actual new load.
+    Type *MapTy;
+    if (LoadTy->isHalfTy())
+      MapTy = Type::getInt16PtrTy(C->getContext(), AS);
+    else if (LoadTy->isFloatTy())
+      MapTy = Type::getInt32PtrTy(C->getContext(), AS);
+    else if (LoadTy->isDoubleTy())
+      MapTy = Type::getInt64PtrTy(C->getContext(), AS);
+    else if (LoadTy->isVectorTy()) {
+      MapTy = PointerType::getIntNPtrTy(C->getContext(),
+                                        DL.getTypeAllocSizeInBits(LoadTy), AS);
+    } else
+      return nullptr;
+
+    C = FoldBitCast(C, MapTy, DL);
+    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL))
+      return FoldBitCast(Res, LoadTy, DL);
+    return nullptr;
+  }
+
+  unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+  if (BytesLoaded > 32 || BytesLoaded == 0)
+    return nullptr;
+
+  GlobalValue *GVal;
+  APInt Offset;
+  if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL))
+    return nullptr;
+
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      !GV->getInitializer()->getType()->isSized())
+    return nullptr;
+
+  // If we're loading off the beginning of the global, some bytes may be valid,
+  // but we don't try to handle this.
+  if (Offset.isNegative())
+    return nullptr;
+
+  // If we're not accessing anything in this constant, the result is undefined.
+  if (Offset.getZExtValue() >=
+      DL.getTypeAllocSize(GV->getInitializer()->getType()))
+    return UndefValue::get(IntType);
+
+  unsigned char RawBytes[32] = {0};
+  if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
+                          BytesLoaded, DL))
+    return nullptr;
+
+  APInt ResultVal = APInt(IntType->getBitWidth(), 0);
+  if (DL.isLittleEndian()) {
+    ResultVal = RawBytes[BytesLoaded - 1];
+    for (unsigned i = 1; i != BytesLoaded; ++i) {
+      ResultVal <<= 8;
+      ResultVal |= RawBytes[BytesLoaded - 1 - i];
+    }
+  } else {
+    ResultVal = RawBytes[0];
+    for (unsigned i = 1; i != BytesLoaded; ++i) {
+      ResultVal <<= 8;
+      ResultVal |= RawBytes[i];
+    }
+  }
+
+  return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
+                                                const DataLayout &DL) {
+  auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
+  if (!DestPtrTy)
+    return nullptr;
+  Type *DestTy = DestPtrTy->getElementType();
+
+  Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL);
+  if (!C)
+    return nullptr;
+
+  do {
+    Type *SrcTy = C->getType();
+
+    // If the type sizes are the same and a cast is legal, just directly
+    // cast the constant.
+    if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
+      Instruction::CastOps Cast = Instruction::BitCast;
+      // If we are going from a pointer to int or vice versa, we spell the cast
+      // differently.
+      if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+        Cast = Instruction::IntToPtr;
+      else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+        Cast = Instruction::PtrToInt;
+
+      if (CastInst::castIsValid(Cast, C, DestTy))
+        return ConstantExpr::getCast(Cast, C, DestTy);
+    }
+
+    // If this isn't an aggregate type, there is nothing we can do to drill down
+    // and find a bitcastable constant.
+    if (!SrcTy->isAggregateType())
+      return nullptr;
+
+    // We're simulating a load through a pointer that was bitcast to point to
+    // a different type, so we can try to walk down through the initial
+    // elements of an aggregate to see if some part of th e aggregate is
+    // castable to implement the "load" semantic model.
+    C = C->getAggregateElement(0u);
+  } while (C);
+
+  return nullptr;
+}
+
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+                                             const DataLayout &DL) {
+  // First, try the easy cases:
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+
+  if (auto *GA = dyn_cast<GlobalAlias>(C))
+    if (GA->getAliasee() && !GA->mayBeOverridden())
+      return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL);
+
+  // If the loaded value isn't a constant expr, we can't handle it.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return nullptr;
+
+  if (CE->getOpcode() == Instruction::GetElementPtr) {
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+        if (Constant *V =
+             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+          return V;
+      }
+    }
+  }
+
+  if (CE->getOpcode() == Instruction::BitCast)
+    if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL))
+      return LoadedC;
+
+  // Instead of loading constant c string, use corresponding integer value
+  // directly if string length is small enough.
+  StringRef Str;
+  if (getConstantStringInfo(CE, Str) && !Str.empty()) {
+    unsigned StrLen = Str.size();
+    Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+    unsigned NumBits = Ty->getPrimitiveSizeInBits();
+    // Replace load with immediate integer if the result is an integer or fp
+    // value.
+    if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+        (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+      APInt StrVal(NumBits, 0);
+      APInt SingleChar(NumBits, 0);
+      if (DL.isLittleEndian()) {
+        for (signed i = StrLen-1; i >= 0; i--) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+      } else {
+        for (unsigned i = 0; i < StrLen; i++) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+        // Append NULL at the end.
+        SingleChar = 0;
+        StrVal = (StrVal << 8) | SingleChar;
+      }
+
+      Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+      if (Ty->isFloatingPointTy())
+        Res = ConstantExpr::getBitCast(Res, Ty);
+      return Res;
+    }
+  }
+
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV =
+          dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) {
+    if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+      Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+      if (GV->getInitializer()->isNullValue())
+        return Constant::getNullValue(ResTy);
+      if (isa<UndefValue>(GV->getInitializer()))
+        return UndefValue::get(ResTy);
+    }
+  }
+
+  // Try hard to fold loads from bitcasted strange and non-type-safe things.
+  return FoldReinterpretLoadFromConstPtr(CE, DL);
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI,
+                                      const DataLayout &DL) {
+  if (LI->isVolatile()) return nullptr;
+
+  if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+    return ConstantFoldLoadFromConstPtr(C, DL);
+
+  return nullptr;
+}
+
+/// One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together.  If target data info is available, it is provided as DL,
+/// otherwise DL is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+                                           Constant *Op1,
+                                           const DataLayout &DL) {
+  // SROA
+
+  // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+  // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+  // bits.
+
+  if (Opc == Instruction::And) {
+    unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
+    APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
+    APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
+    computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
+    computeKnownBits(Op1, KnownZero1, KnownOne1, DL);
+    if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
+      // All the bits of Op0 that the 'and' could be masking are already zero.
+      return Op0;
+    }
+    if ((KnownOne0 | KnownZero1).isAllOnesValue()) {
+      // All the bits of Op1 that the 'and' could be masking are already zero.
+      return Op1;
+    }
+
+    APInt KnownZero = KnownZero0 | KnownZero1;
+    APInt KnownOne = KnownOne0 & KnownOne1;
+    if ((KnownZero | KnownOne).isAllOnesValue()) {
+      return ConstantInt::get(Op0->getType(), KnownOne);
+    }
+  }
+
+  // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+  // constant.  This happens frequently when iterating over a global array.
+  if (Opc == Instruction::Sub) {
+    GlobalValue *GV1, *GV2;
+    APInt Offs1, Offs2;
+
+    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
+      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
+        unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
+
+        // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+        // PtrToInt may change the bitwidth so we have convert to the right size
+        // first.
+        return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
+                                                Offs2.zextOrTrunc(OpSize));
+      }
+  }
+
+  return nullptr;
+}
+
+/// If array indices are not pointer-sized integers, explicitly cast them so
+/// that they aren't implicitly casted by the getelementptr.
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                Type *ResultTy, const DataLayout &DL,
+                                const TargetLibraryInfo *TLI) {
+  Type *IntPtrTy = DL.getIntPtrType(ResultTy);
+
+  bool Any = false;
+  SmallVector<Constant*, 32> NewIdxs;
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+    if ((i == 1 ||
+         !isa<StructType>(GetElementPtrInst::getIndexedType(
+             cast<PointerType>(Ops[0]->getType()->getScalarType())
+                 ->getElementType(),
+             Ops.slice(1, i - 1)))) &&
+        Ops[i]->getType() != IntPtrTy) {
+      Any = true;
+      NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                      true,
+                                                                      IntPtrTy,
+                                                                      true),
+                                              Ops[i], IntPtrTy));
+    } else
+      NewIdxs.push_back(Ops[i]);
+  }
+
+  if (!Any)
+    return nullptr;
+
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
+      C = Folded;
+  }
+
+  return C;
+}
+
+/// Strip the pointer casts, but preserve the address space information.
+static Constant* StripPtrCastKeepAS(Constant* Ptr) {
+  assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
+  PointerType *OldPtrTy = cast<PointerType>(Ptr->getType());
+  Ptr = Ptr->stripPointerCasts();
+  PointerType *NewPtrTy = cast<PointerType>(Ptr->getType());
+
+  // Preserve the address space number of the pointer.
+  if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
+    NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
+      OldPtrTy->getAddressSpace());
+    Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy);
+  }
+  return Ptr;
+}
+
+/// If we can symbolically evaluate the GEP constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                         Type *ResultTy, const DataLayout &DL,
+                                         const TargetLibraryInfo *TLI) {
+  Constant *Ptr = Ops[0];
+  if (!Ptr->getType()->getPointerElementType()->isSized() ||
+      !Ptr->getType()->isPointerTy())
+    return nullptr;
+
+  Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
+  Type *ResultElementTy = ResultTy->getPointerElementType();
+
+  // If this is a constant expr gep that is effectively computing an
+  // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    if (!isa<ConstantInt>(Ops[i])) {
+
+      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+      // "inttoptr (sub (ptrtoint Ptr), V)"
+      if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+        assert((!CE || CE->getType() == IntPtrTy) &&
+               "CastGEPIndices didn't canonicalize index types!");
+        if (CE && CE->getOpcode() == Instruction::Sub &&
+            CE->getOperand(0)->isNullValue()) {
+          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+          Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+          if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+            Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
+          return Res;
+        }
+      }
+      return nullptr;
+    }
+
+  unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
+  APInt Offset =
+      APInt(BitWidth,
+            DL.getIndexedOffset(
+                Ptr->getType(),
+                makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
+  Ptr = StripPtrCastKeepAS(Ptr);
+
+  // If this is a GEP of a GEP, fold it all into a single GEP.
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
+
+    // Do not try the incorporate the sub-GEP if some index is not a number.
+    bool AllConstantInt = true;
+    for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+      if (!isa<ConstantInt>(NestedOps[i])) {
+        AllConstantInt = false;
+        break;
+      }
+    if (!AllConstantInt)
+      break;
+
+    Ptr = cast<Constant>(GEP->getOperand(0));
+    Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps));
+    Ptr = StripPtrCastKeepAS(Ptr);
+  }
+
+  // If the base value for this address is a literal integer value, fold the
+  // getelementptr to the resulting integer value casted to the pointer type.
+  APInt BasePtr(BitWidth, 0);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+    if (CE->getOpcode() == Instruction::IntToPtr) {
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+        BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+    }
+  }
+
+  if (Ptr->isNullValue() || BasePtr != 0) {
+    Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
+    return ConstantExpr::getIntToPtr(C, ResultTy);
+  }
+
+  // Otherwise form a regular getelementptr. Recompute the indices so that
+  // we eliminate over-indexing of the notional static type array bounds.
+  // This makes it easy to determine if the getelementptr is "inbounds".
+  // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  Type *Ty = Ptr->getType();
+  assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+  SmallVector<Constant *, 32> NewIdxs;
+
+  do {
+    if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+      if (ATy->isPointerTy()) {
+        // The only pointer indexing we'll do is on the first index of the GEP.
+        if (!NewIdxs.empty())
+          break;
+
+        // Only handle pointers to sized types, not pointers to functions.
+        if (!ATy->getElementType()->isSized())
+          return nullptr;
+      }
+
+      // Determine which element of the array the offset points into.
+      APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType()));
+      if (ElemSize == 0)
+        // The element size is 0. This may be [0 x Ty]*, so just use a zero
+        // index for this level and proceed to the next level to see if it can
+        // accommodate the offset.
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+      else {
+        // The element size is non-zero divide the offset by the element
+        // size (rounding down), to compute the index at this level.
+        APInt NewIdx = Offset.udiv(ElemSize);
+        Offset -= NewIdx * ElemSize;
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+      }
+      Ty = ATy->getElementType();
+    } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+      // If we end up with an offset that isn't valid for this struct type, we
+      // can't re-form this GEP in a regular form, so bail out. The pointer
+      // operand likely went through casts that are necessary to make the GEP
+      // sensible.
+      const StructLayout &SL = *DL.getStructLayout(STy);
+      if (Offset.uge(SL.getSizeInBytes()))
+        break;
+
+      // Determine which field of the struct the offset points into. The
+      // getZExtValue is fine as we've already ensured that the offset is
+      // within the range representable by the StructLayout API.
+      unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                         ElIdx));
+      Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+      Ty = STy->getTypeAtIndex(ElIdx);
+    } else {
+      // We've reached some non-indexable type.
+      break;
+    }
+  } while (Ty != ResultElementTy);
+
+  // If we haven't used up the entire offset by descending the static
+  // type, then the offset is pointing into the middle of an indivisible
+  // member, so we can't simplify it.
+  if (Offset != 0)
+    return nullptr;
+
+  // Create a GEP.
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
+  assert(C->getType()->getPointerElementType() == Ty &&
+         "Computed GetElementPtr has unexpected type!");
+
+  // If we ended up indexing a member with a type that doesn't match
+  // the type of what the original indices indexed, add a cast.
+  if (Ty != ResultElementTy)
+    C = FoldBitCast(C, ResultTy, DL);
+
+  return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
+                                        const TargetLibraryInfo *TLI) {
+  // Handle PHI nodes quickly here...
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    Constant *CommonValue = nullptr;
+
+    for (Value *Incoming : PN->incoming_values()) {
+      // If the incoming value is undef then skip it.  Note that while we could
+      // skip the value if it is equal to the phi node itself we choose not to
+      // because that would break the rule that constant folding only applies if
+      // all operands are constants.
+      if (isa<UndefValue>(Incoming))
+        continue;
+      // If the incoming value is not a constant, then give up.
+      Constant *C = dyn_cast<Constant>(Incoming);
+      if (!C)
+        return nullptr;
+      // Fold the PHI's operands.
+      if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
+        C = ConstantFoldConstantExpression(NewC, DL, TLI);
+      // If the incoming value is a different constant to
+      // the one we saw previously, then give up.
+      if (CommonValue && C != CommonValue)
+        return nullptr;
+      CommonValue = C;
+    }
+
+
+    // If we reach here, all incoming values are the same constant or undef.
+    return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+  }
+
+  // Scan the operand list, checking to see if they are all constants, if so,
+  // hand off to ConstantFoldInstOperands.
+  SmallVector<Constant*, 8> Ops;
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+    Constant *Op = dyn_cast<Constant>(*i);
+    if (!Op)
+      return nullptr;  // All operands not constant!
+
+    // Fold the Instruction's operands.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
+      Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
+
+    Ops.push_back(Op);
+  }
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           DL, TLI);
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return ConstantFoldLoadInst(LI, DL);
+
+  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
+    return ConstantExpr::getInsertValue(
+                                cast<Constant>(IVI->getAggregateOperand()),
+                                cast<Constant>(IVI->getInsertedValueOperand()),
+                                IVI->getIndices());
+  }
+
+  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) {
+    return ConstantExpr::getExtractValue(
+                                    cast<Constant>(EVI->getAggregateOperand()),
+                                    EVI->getIndices());
+  }
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI);
+}
+
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
+                                   const TargetLibraryInfo *TLI,
+                                   SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
+  SmallVector<Constant *, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+       ++i) {
+    Constant *NewC = cast<Constant>(*i);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+      if (FoldedOps.insert(NewCE).second)
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
+    }
+    Ops.push_back(NewC);
+  }
+
+  if (CE->isCompare())
+    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                           DL, TLI);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI);
+}
+
+/// Attempt to fold the constant expression
+/// using the specified DataLayout.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const DataLayout &DL,
+                                               const TargetLibraryInfo *TLI) {
+  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+  return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
+}
+
+/// Attempt to constant fold an instruction with the
+/// specified opcode and operands.  If successful, the constant result is
+/// returned, if not, null is returned.  Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
+                                         ArrayRef<Constant *> Ops,
+                                         const DataLayout &DL,
+                                         const TargetLibraryInfo *TLI) {
+  // Handle easy binops first.
+  if (Instruction::isBinaryOp(Opcode)) {
+    if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL))
+        return C;
+    }
+
+    return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+  }
+
+  switch (Opcode) {
+  default: return nullptr;
+  case Instruction::ICmp:
+  case Instruction::FCmp: llvm_unreachable("Invalid for compares");
+  case Instruction::Call:
+    if (Function *F = dyn_cast<Function>(Ops.back()))
+      if (canConstantFoldCallTo(F))
+        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
+    return nullptr;
+  case Instruction::PtrToInt:
+    // If the input is a inttoptr, eliminate the pair.  This requires knowing
+    // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+      if (CE->getOpcode() == Instruction::IntToPtr) {
+        Constant *Input = CE->getOperand(0);
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
+        unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
+        if (PtrWidth < InWidth) {
+          Constant *Mask =
+            ConstantInt::get(CE->getContext(),
+                             APInt::getLowBitsSet(InWidth, PtrWidth));
+          Input = ConstantExpr::getAnd(Input, Mask);
+        }
+        // Do a zext or trunc to get to the dest size.
+        return ConstantExpr::getIntegerCast(Input, DestTy, false);
+      }
+    }
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::IntToPtr:
+    // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+    // the int size is >= the ptr size and the address spaces are the same.
+    // This requires knowing the width of a pointer, so it can't be done in
+    // ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+      if (CE->getOpcode() == Instruction::PtrToInt) {
+        Constant *SrcPtr = CE->getOperand(0);
+        unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
+        unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+        if (MidIntSize >= SrcPtrSize) {
+          unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+          if (SrcAS == DestTy->getPointerAddressSpace())
+            return FoldBitCast(CE->getOperand(0), DestTy, DL);
+        }
+      }
+    }
+
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::AddrSpaceCast:
+      return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::BitCast:
+    return FoldBitCast(Ops[0], DestTy, DL);
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr: {
+    Type *SrcTy = nullptr;
+    if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
+      return C;
+    if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
+      return C;
+
+    return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+  }
+  }
+}
+
+/// Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands.  If it fails, it
+/// returns a constant expression of the specified operands.
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+                                                Constant *Ops0, Constant *Ops1,
+                                                const DataLayout &DL,
+                                                const TargetLibraryInfo *TLI) {
+  // fold: icmp (inttoptr x), null         -> icmp x, 0
+  // fold: icmp (ptrtoint x), 0            -> icmp x, null
+  // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+  // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+  //
+  // FIXME: The following comment is out of data and the DataLayout is here now.
+  // ConstantExpr::getCompare cannot do this, because it doesn't have DL
+  // around to know if bit truncation is happening.
+  if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+    if (Ops1->isNullValue()) {
+      if (CE0->getOpcode() == Instruction::IntToPtr) {
+        Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
+        // Convert the integer value to the right size to ensure we get the
+        // proper extension or truncation.
+        Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                   IntPtrTy, false);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
+      }
+
+      // Only do this transformation if the int is intptrty in size, otherwise
+      // there is a truncation or extension that we aren't modeling.
+      if (CE0->getOpcode() == Instruction::PtrToInt) {
+        Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
+        if (CE0->getType() == IntPtrTy) {
+          Constant *C = CE0->getOperand(0);
+          Constant *Null = Constant::getNullValue(C->getType());
+          return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
+        }
+      }
+    }
+
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+      if (CE0->getOpcode() == CE1->getOpcode()) {
+        if (CE0->getOpcode() == Instruction::IntToPtr) {
+          Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
+
+          // Convert the integer value to the right size to ensure we get the
+          // proper extension or truncation.
+          Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                      IntPtrTy, false);
+          Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+                                                      IntPtrTy, false);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
+        }
+
+        // Only do this transformation if the int is intptrty in size, otherwise
+        // there is a truncation or extension that we aren't modeling.
+        if (CE0->getOpcode() == Instruction::PtrToInt) {
+          Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
+          if (CE0->getType() == IntPtrTy &&
+              CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
+            return ConstantFoldCompareInstOperands(
+                Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
+          }
+        }
+      }
+    }
+
+    // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+    // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+    if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+        CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+      Constant *LHS = ConstantFoldCompareInstOperands(
+          Predicate, CE0->getOperand(0), Ops1, DL, TLI);
+      Constant *RHS = ConstantFoldCompareInstOperands(
+          Predicate, CE0->getOperand(1), Ops1, DL, TLI);
+      unsigned OpC =
+        Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+      Constant *Ops[] = { LHS, RHS };
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI);
+    }
+  }
+
+  return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// Given a constant and a getelementptr constantexpr, return the constant value
+/// being addressed by the constant expression, or null if something is funny
+/// and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
+                                                       ConstantExpr *CE) {
+  if (!CE->getOperand(1)->isNullValue())
+    return nullptr;  // Do not allow stepping over the value!
+
+  // Loop over all of the operands, tracking down which value we are
+  // addressing.
+  for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
+    C = C->getAggregateElement(CE->getOperand(i));
+    if (!C)
+      return nullptr;
+  }
+  return C;
+}
+
+/// Given a constant and getelementptr indices (with an *implied* zero pointer
+/// index that is not in the list), return the constant value being addressed by
+/// a virtual load, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
+                                                  ArrayRef<Constant*> Indices) {
+  // Loop over all of the operands, tracking down which value we are
+  // addressing.
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+    C = C->getAggregateElement(Indices[i]);
+    if (!C)
+      return nullptr;
+  }
+  return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Constant Folding for Calls
+//
+
+/// Return true if it's even possible to fold a call to the specified function.
+bool llvm::canConstantFoldCallTo(const Function *F) {
+  switch (F->getIntrinsicID()) {
+  case Intrinsic::fabs:
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::log:
+  case Intrinsic::log2:
+  case Intrinsic::log10:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::pow:
+  case Intrinsic::powi:
+  case Intrinsic::bswap:
+  case Intrinsic::ctpop:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+  case Intrinsic::copysign:
+  case Intrinsic::round:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16:
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64:
+    return true;
+  default:
+    return false;
+  case 0: break;
+  }
+
+  if (!F->hasName())
+    return false;
+  StringRef Name = F->getName();
+
+  // In these cases, the check of the length is required.  We don't want to
+  // return true for a name like "cos\0blah" which strcmp would return equal to
+  // "cos", but has length 8.
+  switch (Name[0]) {
+  default:
+    return false;
+  case 'a':
+    return Name == "acos" || Name == "asin" || Name == "atan" ||
+           Name == "atan2" || Name == "acosf" || Name == "asinf" ||
+           Name == "atanf" || Name == "atan2f";
+  case 'c':
+    return Name == "ceil" || Name == "cos" || Name == "cosh" ||
+           Name == "ceilf" || Name == "cosf" || Name == "coshf";
+  case 'e':
+    return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
+  case 'f':
+    return Name == "fabs" || Name == "floor" || Name == "fmod" ||
+           Name == "fabsf" || Name == "floorf" || Name == "fmodf";
+  case 'l':
+    return Name == "log" || Name == "log10" || Name == "logf" ||
+           Name == "log10f";
+  case 'p':
+    return Name == "pow" || Name == "powf";
+  case 's':
+    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+           Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
+  case 't':
+    return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+  }
+}
+
+static Constant *GetConstantFoldFPValue(double V, Type *Ty) {
+  if (Ty->isHalfTy()) {
+    APFloat APF(V);
+    bool unused;
+    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    return ConstantFP::get(Ty->getContext(), APF);
+  }
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
+  llvm_unreachable("Can only constant fold half/float/double");
+
+}
+
+namespace {
+/// Clear the floating-point exception state.
+static inline void llvm_fenv_clearexcept() {
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
+  feclearexcept(FE_ALL_EXCEPT);
+#endif
+  errno = 0;
+}
+
+/// Test if a floating-point exception was raised.
+static inline bool llvm_fenv_testexcept() {
+  int errno_val = errno;
+  if (errno_val == ERANGE || errno_val == EDOM)
+    return true;
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
+  if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
+    return true;
+#endif
+  return false;
+}
+} // End namespace
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+                                Type *Ty) {
+  llvm_fenv_clearexcept();
+  V = NativeFP(V);
+  if (llvm_fenv_testexcept()) {
+    llvm_fenv_clearexcept();
+    return nullptr;
+  }
+
+  return GetConstantFoldFPValue(V, Ty);
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+                                      double V, double W, Type *Ty) {
+  llvm_fenv_clearexcept();
+  V = NativeFP(V, W);
+  if (llvm_fenv_testexcept()) {
+    llvm_fenv_clearexcept();
+    return nullptr;
+  }
+
+  return GetConstantFoldFPValue(V, Ty);
+}
+
+/// Attempt to fold an SSE floating point to integer conversion of a constant
+/// floating point. If roundTowardZero is false, the default IEEE rounding is
+/// used (toward nearest, ties to even). This matches the behavior of the
+/// non-truncating SSE instructions in the default rounding mode. The desired
+/// integer type Ty is used to select how many bits are available for the
+/// result. Returns null if the conversion cannot be performed, otherwise
+/// returns the Constant value resulting from the conversion.
+static Constant *ConstantFoldConvertToInt(const APFloat &Val,
+                                          bool roundTowardZero, Type *Ty) {
+  // All of these conversion intrinsics form an integer of at most 64bits.
+  unsigned ResultWidth = Ty->getIntegerBitWidth();
+  assert(ResultWidth <= 64 &&
+         "Can only constant fold conversions to 64 and 32 bit ints");
+
+  uint64_t UIntVal;
+  bool isExact = false;
+  APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+                                              : APFloat::rmNearestTiesToEven;
+  APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+                                                  /*isSigned=*/true, mode,
+                                                  &isExact);
+  if (status != APFloat::opOK && status != APFloat::opInexact)
+    return nullptr;
+  return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+static double getValueAsDouble(ConstantFP *Op) {
+  Type *Ty = Op->getType();
+
+  if (Ty->isFloatTy())
+    return Op->getValueAPF().convertToFloat();
+
+  if (Ty->isDoubleTy())
+    return Op->getValueAPF().convertToDouble();
+
+  bool unused;
+  APFloat APF = Op->getValueAPF();
+  APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+  return APF.convertToDouble();
+}
+
+static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
+                                        Type *Ty, ArrayRef<Constant *> Operands,
+                                        const TargetLibraryInfo *TLI) {
+  if (Operands.size() == 1) {
+    if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+      if (IntrinsicID == Intrinsic::convert_to_fp16) {
+        APFloat Val(Op->getValueAPF());
+
+        bool lost = false;
+        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+        return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
+      }
+
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+        return nullptr;
+
+      if (IntrinsicID == Intrinsic::round) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToAway);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::floor) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardNegative);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::ceil) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardPositive);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::trunc) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardZero);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::rint) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::nearbyint) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      /// We only fold functions with finite arguments. Folding NaN and inf is
+      /// likely to be aborted with an exception anyway, and some host libms
+      /// have known errors raising exceptions.
+      if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+        return nullptr;
+
+      /// Currently APFloat versions of these functions do not exist, so we use
+      /// the host native double versions.  Float versions are not called
+      /// directly but for all these it is true (float)(f((double)arg)) ==
+      /// f(arg).  Long double not supported yet.
+      double V = getValueAsDouble(Op);
+
+      switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::fabs:
+          return ConstantFoldFP(fabs, V, Ty);
+        case Intrinsic::log2:
+          return ConstantFoldFP(Log2, V, Ty);
+        case Intrinsic::log:
+          return ConstantFoldFP(log, V, Ty);
+        case Intrinsic::log10:
+          return ConstantFoldFP(log10, V, Ty);
+        case Intrinsic::exp:
+          return ConstantFoldFP(exp, V, Ty);
+        case Intrinsic::exp2:
+          return ConstantFoldFP(exp2, V, Ty);
+        case Intrinsic::sin:
+          return ConstantFoldFP(sin, V, Ty);
+        case Intrinsic::cos:
+          return ConstantFoldFP(cos, V, Ty);
+      }
+
+      if (!TLI)
+        return nullptr;
+
+      switch (Name[0]) {
+      case 'a':
+        if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
+            (Name == "acosf" && TLI->has(LibFunc::acosf)))
+          return ConstantFoldFP(acos, V, Ty);
+        else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
+                 (Name == "asinf" && TLI->has(LibFunc::asinf)))
+          return ConstantFoldFP(asin, V, Ty);
+        else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
+                 (Name == "atanf" && TLI->has(LibFunc::atanf)))
+          return ConstantFoldFP(atan, V, Ty);
+        break;
+      case 'c':
+        if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
+            (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
+          return ConstantFoldFP(ceil, V, Ty);
+        else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
+                 (Name == "cosf" && TLI->has(LibFunc::cosf)))
+          return ConstantFoldFP(cos, V, Ty);
+        else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
+                 (Name == "coshf" && TLI->has(LibFunc::coshf)))
+          return ConstantFoldFP(cosh, V, Ty);
+        break;
+      case 'e':
+        if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
+            (Name == "expf" && TLI->has(LibFunc::expf)))
+          return ConstantFoldFP(exp, V, Ty);
+        if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
+            (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
+          // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+          // C99 library.
+          return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+        break;
+      case 'f':
+        if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
+            (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
+          return ConstantFoldFP(fabs, V, Ty);
+        else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
+                 (Name == "floorf" && TLI->has(LibFunc::floorf)))
+          return ConstantFoldFP(floor, V, Ty);
+        break;
+      case 'l':
+        if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
+            (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
+          return ConstantFoldFP(log, V, Ty);
+        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
+                 (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
+          return ConstantFoldFP(log10, V, Ty);
+        else if (IntrinsicID == Intrinsic::sqrt &&
+                 (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
+          if (V >= -0.0)
+            return ConstantFoldFP(sqrt, V, Ty);
+          else {
+            // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which
+            // all guarantee or favor returning NaN - the square root of a
+            // negative number is not defined for the LLVM sqrt intrinsic.
+            // This is because the intrinsic should only be emitted in place of
+            // libm's sqrt function when using "no-nans-fp-math".
+            return UndefValue::get(Ty);
+          }
+        }
+        break;
+      case 's':
+        if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
+            (Name == "sinf" && TLI->has(LibFunc::sinf)))
+          return ConstantFoldFP(sin, V, Ty);
+        else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
+                 (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
+          return ConstantFoldFP(sinh, V, Ty);
+        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
+                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
+          return ConstantFoldFP(sqrt, V, Ty);
+        break;
+      case 't':
+        if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
+            (Name == "tanf" && TLI->has(LibFunc::tanf)))
+          return ConstantFoldFP(tan, V, Ty);
+        else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
+                 (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
+          return ConstantFoldFP(tanh, V, Ty);
+        break;
+      default:
+        break;
+      }
+      return nullptr;
+    }
+
+    if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      switch (IntrinsicID) {
+      case Intrinsic::bswap:
+        return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
+      case Intrinsic::ctpop:
+        return ConstantInt::get(Ty, Op->getValue().countPopulation());
+      case Intrinsic::convert_from_fp16: {
+        APFloat Val(APFloat::IEEEhalf, Op->getValue());
+
+        bool lost = false;
+        APFloat::opStatus status = Val.convert(
+            Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
+
+        // Conversion is always precise.
+        (void)status;
+        assert(status == APFloat::opOK && !lost &&
+               "Precision lost during fp16 constfolding");
+
+        return ConstantFP::get(Ty->getContext(), Val);
+      }
+      default:
+        return nullptr;
+      }
+    }
+
+    // Support ConstantVector in case we have an Undef in the top.
+    if (isa<ConstantVector>(Operands[0]) ||
+        isa<ConstantDataVector>(Operands[0])) {
+      Constant *Op = cast<Constant>(Operands[0]);
+      switch (IntrinsicID) {
+      default: break;
+      case Intrinsic::x86_sse_cvtss2si:
+      case Intrinsic::x86_sse_cvtss2si64:
+      case Intrinsic::x86_sse2_cvtsd2si:
+      case Intrinsic::x86_sse2_cvtsd2si64:
+        if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+                                          /*roundTowardZero=*/false, Ty);
+      case Intrinsic::x86_sse_cvttss2si:
+      case Intrinsic::x86_sse_cvttss2si64:
+      case Intrinsic::x86_sse2_cvttsd2si:
+      case Intrinsic::x86_sse2_cvttsd2si64:
+        if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+                                          /*roundTowardZero=*/true, Ty);
+      }
+    }
+
+    if (isa<UndefValue>(Operands[0])) {
+      if (IntrinsicID == Intrinsic::bswap)
+        return Operands[0];
+      return nullptr;
+    }
+
+    return nullptr;
+  }
+
+  if (Operands.size() == 2) {
+    if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+        return nullptr;
+      double Op1V = getValueAsDouble(Op1);
+
+      if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+        if (Op2->getType() != Op1->getType())
+          return nullptr;
+
+        double Op2V = getValueAsDouble(Op2);
+        if (IntrinsicID == Intrinsic::pow) {
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        }
+        if (IntrinsicID == Intrinsic::copysign) {
+          APFloat V1 = Op1->getValueAPF();
+          APFloat V2 = Op2->getValueAPF();
+          V1.copySign(V2);
+          return ConstantFP::get(Ty->getContext(), V1);
+        }
+
+        if (IntrinsicID == Intrinsic::minnum) {
+          const APFloat &C1 = Op1->getValueAPF();
+          const APFloat &C2 = Op2->getValueAPF();
+          return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+        }
+
+        if (IntrinsicID == Intrinsic::maxnum) {
+          const APFloat &C1 = Op1->getValueAPF();
+          const APFloat &C2 = Op2->getValueAPF();
+          return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+        }
+
+        if (!TLI)
+          return nullptr;
+        if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
+            (Name == "powf" && TLI->has(LibFunc::powf)))
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
+            (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+        if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
+            (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+      } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+        if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
+          return ConstantFP::get(Ty->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
+        if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
+          return ConstantFP::get(Ty->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
+        if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
+          return ConstantFP::get(Ty->getContext(),
+                                 APFloat((double)std::pow((double)Op1V,
+                                                   (int)Op2C->getZExtValue())));
+      }
+      return nullptr;
+    }
+
+    if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+      if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::sadd_with_overflow:
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::smul_with_overflow:
+        case Intrinsic::umul_with_overflow: {
+          APInt Res;
+          bool Overflow;
+          switch (IntrinsicID) {
+          default: llvm_unreachable("Invalid case");
+          case Intrinsic::sadd_with_overflow:
+            Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::uadd_with_overflow:
+            Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::ssub_with_overflow:
+            Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::usub_with_overflow:
+            Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::smul_with_overflow:
+            Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::umul_with_overflow:
+            Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+            break;
+          }
+          Constant *Ops[] = {
+            ConstantInt::get(Ty->getContext(), Res),
+            ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+          };
+          return ConstantStruct::get(cast<StructType>(Ty), Ops);
+        }
+        case Intrinsic::cttz:
+          if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
+            return UndefValue::get(Ty);
+          return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+        case Intrinsic::ctlz:
+          if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
+            return UndefValue::get(Ty);
+          return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
+        }
+      }
+
+      return nullptr;
+    }
+    return nullptr;
+  }
+
+  if (Operands.size() != 3)
+    return nullptr;
+
+  if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+    if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+      if (const ConstantFP *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::fma:
+        case Intrinsic::fmuladd: {
+          APFloat V = Op1->getValueAPF();
+          APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(),
+                                                   Op3->getValueAPF(),
+                                                   APFloat::rmNearestTiesToEven);
+          if (s != APFloat::opInvalidOp)
+            return ConstantFP::get(Ty->getContext(), V);
+
+          return nullptr;
+        }
+        }
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
+                                        VectorType *VTy,
+                                        ArrayRef<Constant *> Operands,
+                                        const TargetLibraryInfo *TLI) {
+  SmallVector<Constant *, 4> Result(VTy->getNumElements());
+  SmallVector<Constant *, 4> Lane(Operands.size());
+  Type *Ty = VTy->getElementType();
+
+  for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
+    // Gather a column of constants.
+    for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
+      Constant *Agg = Operands[J]->getAggregateElement(I);
+      if (!Agg)
+        return nullptr;
+
+      Lane[J] = Agg;
+    }
+
+    // Use the regular scalar folding to simplify this column.
+    Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI);
+    if (!Folded)
+      return nullptr;
+    Result[I] = Folded;
+  }
+
+  return ConstantVector::get(Result);
+}
+
+/// Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                       const TargetLibraryInfo *TLI) {
+  if (!F->hasName())
+    return nullptr;
+  StringRef Name = F->getName();
+
+  Type *Ty = F->getReturnType();
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, TLI);
+
+  return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
+}
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
new file mode 100644
index 0000000..0383cbf
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -0,0 +1,533 @@
+//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the cost model analysis. It provides a very basic cost
+// estimation for LLVM-IR. This analysis uses the services of the codegen
+// to approximate the cost of any IR instruction when lowered to machine
+// instructions. The cost results are unit-less and the cost number represents
+// the throughput of the machine assuming that all loads hit the cache, all
+// branches are predicted, etc. The cost numbers can be added in order to
+// compare two or more transformation alternatives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define CM_NAME "cost-model"
+#define DEBUG_TYPE CM_NAME
+
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+                                     cl::Hidden,
+                                     cl::desc("Recognize reduction patterns."));
+
+namespace {
+  class CostModelAnalysis : public FunctionPass {
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
+      initializeCostModelAnalysisPass(
+        *PassRegistry::getPassRegistry());
+    }
+
+    /// Returns the expected cost of the instruction.
+    /// Returns -1 if the cost is unknown.
+    /// Note, this method does not cache the cost calculation and it
+    /// can be expensive in some cases.
+    unsigned getInstructionCost(const Instruction *I) const;
+
+  private:
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+    bool runOnFunction(Function &F) override;
+    void print(raw_ostream &OS, const Module*) const override;
+
+    /// The function that we analyze.
+    Function *F;
+    /// Target information.
+    const TargetTransformInfo *TTI;
+  };
+}  // End of anonymous namespace
+
+// Register this pass.
+char CostModelAnalysis::ID = 0;
+static const char cm_name[] = "Cost Model Analysis";
+INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
+INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
+
+FunctionPass *llvm::createCostModelAnalysisPass() {
+  return new CostModelAnalysis();
+}
+
+void
+CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
+
+bool
+CostModelAnalysis::runOnFunction(Function &F) {
+ this->F = &F;
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
+
+ return false;
+}
+
+static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
+  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
+    if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+      return false;
+  return true;
+}
+
+static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
+  bool isAlternate = true;
+  unsigned MaskSize = Mask.size();
+
+  // Example: shufflevector A, B, <0,5,2,7>
+  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+    if (Mask[i] < 0)
+      continue;
+    isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
+  }
+
+  if (isAlternate)
+    return true;
+
+  isAlternate = true;
+  // Example: shufflevector A, B, <4,1,6,3>
+  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+    if (Mask[i] < 0)
+      continue;
+    isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+  }
+
+  return isAlternate;
+}
+
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+  TargetTransformInfo::OperandValueKind OpInfo =
+    TargetTransformInfo::OK_AnyValue;
+
+  // Check for a splat of a constant or for a non uniform vector of constants.
+  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
+    if (cast<Constant>(V)->getSplatValue() != nullptr)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+  }
+
+  return OpInfo;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+                                     unsigned Level) {
+  // We don't need a shuffle if we just want to have element 0 in position 0 of
+  // the vector.
+  if (!SI && Level == 0 && IsLeft)
+    return true;
+  else if (!SI)
+    return false;
+
+  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+  // we look at the left or right side.
+  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+    Mask[i] = val;
+
+  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+  return Mask == ActualMask;
+}
+
+static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
+                                          unsigned Level, unsigned NumLevels) {
+  // Match one level of pairwise operations.
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  if (BinOp == nullptr)
+    return false;
+
+  assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
+
+  unsigned Opcode = BinOp->getOpcode();
+  Value *L = BinOp->getOperand(0);
+  Value *R = BinOp->getOperand(1);
+
+  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
+  if (!LS && Level)
+    return false;
+  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
+  if (!RS && Level)
+    return false;
+
+  // On level 0 we can omit one shufflevector instruction.
+  if (!Level && !RS && !LS)
+    return false;
+
+  // Shuffle inputs must match.
+  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
+  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
+  Value *NextLevelOp = nullptr;
+  if (NextLevelOpR && NextLevelOpL) {
+    // If we have two shuffles their operands must match.
+    if (NextLevelOpL != NextLevelOpR)
+      return false;
+
+    NextLevelOp = NextLevelOpL;
+  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+    // On the first level we can omit the shufflevector <0, undef,...>. So the
+    // input to the other shufflevector <1, undef> must match with one of the
+    // inputs to the current binary operation.
+    // Example:
+    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
+    //  %BinOp        = fadd          %NextLevelOpL, %R
+    if (NextLevelOpL && NextLevelOpL != R)
+      return false;
+    else if (NextLevelOpR && NextLevelOpR != L)
+      return false;
+
+    NextLevelOp = NextLevelOpL ? R : L;
+  } else
+    return false;
+
+  // Check that the next levels binary operation exists and matches with the
+  // current one.
+  BinaryOperator *NextLevelBinOp = nullptr;
+  if (Level + 1 != NumLevels) {
+    if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
+      return false;
+    else if (NextLevelBinOp->getOpcode() != Opcode)
+      return false;
+  }
+
+  // Shuffle mask for pairwise operation must match.
+  if (matchPairwiseShuffleMask(LS, true, Level)) {
+    if (!matchPairwiseShuffleMask(RS, false, Level))
+      return false;
+  } else if (matchPairwiseShuffleMask(RS, true, Level)) {
+    if (!matchPairwiseShuffleMask(LS, false, Level))
+      return false;
+  } else
+    return false;
+
+  if (++Level == NumLevels)
+    return true;
+
+  // Match next level.
+  return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
+}
+
+static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+                                   unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return false;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return false;
+
+  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return false;
+
+  Type *VecTy = ReduxRoot->getOperand(0)->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return false;
+
+  // We look for a sequence of shuffle,shuffle,add triples like the following
+  // that builds a pairwise reduction tree.
+  //
+  //  (X0, X1, X2, X3)
+  //   (X0 + X1, X2 + X3, undef, undef)
+  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
+  //
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+  if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
+    return false;
+
+  Opcode = RdxStart->getOpcode();
+  Ty = VecTy;
+
+  return true;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(BinaryOperator *B) {
+
+  Value *L = B->getOperand(0);
+  Value *R = B->getOperand(1);
+  ShuffleVectorInst *S = nullptr;
+
+  if ((S = dyn_cast<ShuffleVectorInst>(L)))
+    return std::make_pair(R, S);
+
+  S = dyn_cast<ShuffleVectorInst>(R);
+  return std::make_pair(L, S);
+}
+
+static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+                                          unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return false;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return false;
+
+  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return false;
+  unsigned RdxOpcode = RdxStart->getOpcode();
+
+  Type *VecTy = ReduxRoot->getOperand(0)->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return false;
+
+  // We look for a sequence of shuffles and adds like the following matching one
+  // fadd, shuffle vector pair at a time.
+  //
+  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+  unsigned MaskStart = 1;
+  Value *RdxOp = RdxStart;
+  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+  unsigned NumVecElemsRemain = NumVecElems;
+  while (NumVecElemsRemain - 1) {
+    // Check for the right reduction operation.
+    BinaryOperator *BinOp;
+    if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
+      return false;
+    if (BinOp->getOpcode() != RdxOpcode)
+      return false;
+
+    Value *NextRdxOp;
+    ShuffleVectorInst *Shuffle;
+    std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+
+    // Check the current reduction operation and the shuffle use the same value.
+    if (Shuffle == nullptr)
+      return false;
+    if (Shuffle->getOperand(0) != NextRdxOp)
+      return false;
+
+    // Check that shuffle masks matches.
+    for (unsigned j = 0; j != MaskStart; ++j)
+      ShuffleMask[j] = MaskStart + j;
+    // Fill the rest of the mask with -1 for undef.
+    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+    if (ShuffleMask != Mask)
+      return false;
+
+    RdxOp = NextRdxOp;
+    NumVecElemsRemain /= 2;
+    MaskStart *= 2;
+  }
+
+  Opcode = RdxOpcode;
+  Ty = VecTy;
+  return true;
+}
+
+unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
+  if (!TTI)
+    return -1;
+
+  switch (I->getOpcode()) {
+  case Instruction::GetElementPtr:
+    return TTI->getUserCost(I);
+
+  case Instruction::Ret:
+  case Instruction::PHI:
+  case Instruction::Br: {
+    return TTI->getCFInstrCost(I->getOpcode());
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    TargetTransformInfo::OperandValueKind Op1VK =
+      getOperandInfo(I->getOperand(0));
+    TargetTransformInfo::OperandValueKind Op2VK =
+      getOperandInfo(I->getOperand(1));
+    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+                                       Op2VK);
+  }
+  case Instruction::Select: {
+    const SelectInst *SI = cast<SelectInst>(I);
+    Type *CondTy = SI->getCondition()->getType();
+    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+  }
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    Type *ValTy = I->getOperand(0)->getType();
+    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+  }
+  case Instruction::Store: {
+    const StoreInst *SI = cast<StoreInst>(I);
+    Type *ValTy = SI->getValueOperand()->getType();
+    return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                                 SI->getAlignment(),
+                                 SI->getPointerAddressSpace());
+  }
+  case Instruction::Load: {
+    const LoadInst *LI = cast<LoadInst>(I);
+    return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+                                 LI->getAlignment(),
+                                 LI->getPointerAddressSpace());
+  }
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast:
+  case Instruction::AddrSpaceCast: {
+    Type *SrcTy = I->getOperand(0)->getType();
+    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+  }
+  case Instruction::ExtractElement: {
+    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    unsigned Idx = -1;
+    if (CI)
+      Idx = CI->getZExtValue();
+
+    // Try to match a reduction sequence (series of shufflevector and vector
+    // adds followed by a extractelement).
+    unsigned ReduxOpCode;
+    Type *ReduxType;
+
+    if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
+      return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
+    else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
+      return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
+
+    return TTI->getVectorInstrCost(I->getOpcode(),
+                                   EEI->getOperand(0)->getType(), Idx);
+  }
+  case Instruction::InsertElement: {
+    const InsertElementInst * IE = cast<InsertElementInst>(I);
+    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+    unsigned Idx = -1;
+    if (CI)
+      Idx = CI->getZExtValue();
+    return TTI->getVectorInstrCost(I->getOpcode(),
+                                   IE->getType(), Idx);
+  }
+  case Instruction::ShuffleVector: {
+    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
+    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+
+    if (NumVecElems == Mask.size()) {
+      if (isReverseVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
+                                   0, nullptr);
+      if (isAlternateVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
+                                   VecTypOp0, 0, nullptr);
+    }
+
+    return -1;
+  }
+  case Instruction::Call:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      SmallVector<Value *, 4> Args;
+      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
+        Args.push_back(II->getArgOperand(J));
+
+      return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+                                        Args);
+    }
+    return -1;
+  default:
+    // We don't have any information on this instruction.
+    return -1;
+  }
+}
+
+void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
+  if (!F)
+    return;
+
+  for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
+    for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
+      Instruction *Inst = &*it;
+      unsigned Cost = getInstructionCost(Inst);
+      if (Cost != (unsigned)-1)
+        OS << "Cost Model: Found an estimated cost of " << Cost;
+      else
+        OS << "Cost Model: Unknown cost";
+
+      OS << " for instruction: "<< *Inst << "\n";
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
new file mode 100644
index 0000000..baee8b3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -0,0 +1,141 @@
+//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+
+namespace {
+
+class Delinearization : public FunctionPass {
+  Delinearization(const Delinearization &); // do not implement
+protected:
+  Function *F;
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  Delinearization() : FunctionPass(ID) {
+    initializeDelinearizationPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void print(raw_ostream &O, const Module *M = nullptr) const override;
+};
+
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+  this->F = &F;
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  return false;
+}
+
+static Value *getPointerOperand(Instruction &Inst) {
+  if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
+    return Load->getPointerOperand();
+  else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
+    return Store->getPointerOperand();
+  else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
+    return Gep->getPointerOperand();
+  return nullptr;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+  O << "Delinearization on function " << F->getName() << ":\n";
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    Instruction *Inst = &(*I);
+
+    // Only analyze loads and stores.
+    if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
+        !isa<GetElementPtrInst>(Inst))
+      continue;
+
+    const BasicBlock *BB = Inst->getParent();
+    // Delinearize the memory access as analyzed in all the surrounding loops.
+    // Do not analyze memory accesses outside loops.
+    for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
+      const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+
+      const SCEVUnknown *BasePointer =
+          dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
+      // Do not delinearize if we cannot find the base pointer.
+      if (!BasePointer)
+        break;
+      AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
+
+      O << "\n";
+      O << "Inst:" << *Inst << "\n";
+      O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
+      O << "AccessFunction: " << *AccessFn << "\n";
+
+      SmallVector<const SCEV *, 3> Subscripts, Sizes;
+      SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
+      if (Subscripts.size() == 0 || Sizes.size() == 0 ||
+          Subscripts.size() != Sizes.size()) {
+        O << "failed to delinearize\n";
+        continue;
+      }
+
+      O << "Base offset: " << *BasePointer << "\n";
+      O << "ArrayDecl[UnknownSize]";
+      int Size = Subscripts.size();
+      for (int i = 0; i < Size - 1; i++)
+        O << "[" << *Sizes[i] << "]";
+      O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+
+      O << "ArrayRef";
+      for (int i = 0; i < Size; i++)
+        O << "[" << *Subscripts[i] << "]";
+      O << "\n";
+    }
+  }
+}
+
+char Delinearization::ID = 0;
+static const char delinearization_name[] = "Delinearization";
+INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
+                      true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
+
+FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
new file mode 100644
index 0000000..912c5ce
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -0,0 +1,392 @@
+//===---- DemandedBits.cpp - Determine demanded bits ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+//   %1 = add i32 %x, %y
+//   %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "demanded-bits"
+
+char DemandedBits::ID = 0;
+INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis",
+                    false, false)
+
+DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) {
+  initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
+}
+
+void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.setPreservesAll();
+}
+
+static bool isAlwaysLive(Instruction *I) {
+  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+      I->isEHPad() || I->mayHaveSideEffects();
+}
+
+void DemandedBits::determineLiveOperandBits(
+    const Instruction *UserI, const Instruction *I, unsigned OperandNo,
+    const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
+    APInt &KnownZero2, APInt &KnownOne2) {
+  unsigned BitWidth = AB.getBitWidth();
+
+  // We're called once per operand, but for some instructions, we need to
+  // compute known bits of both operands in order to determine the live bits of
+  // either (when both operands are instructions themselves). We don't,
+  // however, want to do this twice, so we cache the result in APInts that live
+  // in the caller. For the two-relevant-operands case, both operand values are
+  // provided here.
+  auto ComputeKnownBits =
+      [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+        const DataLayout &DL = I->getModule()->getDataLayout();
+        KnownZero = APInt(BitWidth, 0);
+        KnownOne = APInt(BitWidth, 0);
+        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+                         AC, UserI, DT);
+
+        if (V2) {
+          KnownZero2 = APInt(BitWidth, 0);
+          KnownOne2 = APInt(BitWidth, 0);
+          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+                           0, AC, UserI, DT);
+        }
+      };
+
+  switch (UserI->getOpcode()) {
+  default: break;
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap:
+        // The alive bits of the input are the swapped alive bits of
+        // the output.
+        AB = AOut.byteSwap();
+        break;
+      case Intrinsic::ctlz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the left of, and including, the leftmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getHighBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+        }
+        break;
+      case Intrinsic::cttz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the right of, and including, the rightmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getLowBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+        }
+        break;
+      }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // Find the highest live output bit. We don't need any more input
+    // bits than that (adds, and thus subtracts, ripple only to the
+    // left).
+    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+    break;
+  case Instruction::Shl:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.lshr(ShiftAmt);
+
+        // If the shift is nuw/nsw, then the high bits are not dead
+        // (because we've promised that they *must* be zero).
+        const ShlOperator *S = cast<ShlOperator>(UserI);
+        if (S->hasNoSignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+        else if (S->hasNoUnsignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::LShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<LShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::AShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+        // Because the high input bit is replicated into the
+        // high-order bits of the result, if we need any of those
+        // bits, then we must keep the highest input bit.
+        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+            .getBoolValue())
+          AB.setBit(BitWidth-1);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<AShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::And:
+    AB = AOut;
+
+    // For bits that are known zero, the corresponding bits in the
+    // other operand are dead (unless they're both zero, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownZero2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownZero & ~KnownZero2);
+    }
+    break;
+  case Instruction::Or:
+    AB = AOut;
+
+    // For bits that are known one, the corresponding bits in the
+    // other operand are dead (unless they're both one, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownOne2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownOne & ~KnownOne2);
+    }
+    break;
+  case Instruction::Xor:
+  case Instruction::PHI:
+    AB = AOut;
+    break;
+  case Instruction::Trunc:
+    AB = AOut.zext(BitWidth);
+    break;
+  case Instruction::ZExt:
+    AB = AOut.trunc(BitWidth);
+    break;
+  case Instruction::SExt:
+    AB = AOut.trunc(BitWidth);
+    // Because the high input bit is replicated into the
+    // high-order bits of the result, if we need any of those
+    // bits, then we must keep the highest input bit.
+    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+                                      AOut.getBitWidth() - BitWidth))
+        .getBoolValue())
+      AB.setBit(BitWidth-1);
+    break;
+  case Instruction::Select:
+    if (OperandNo != 0)
+      AB = AOut;
+    break;
+  case Instruction::ICmp:
+    // Count the number of leading zeroes in each operand.
+    ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+    auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(),
+                                     KnownZero2.countLeadingOnes());
+    AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes);
+    break;
+  }
+}
+
+bool DemandedBits::runOnFunction(Function& Fn) {
+  F = &Fn;
+  Analyzed = false;
+  return false;
+}
+
+void DemandedBits::performAnalysis() {
+  if (Analyzed)
+    // Analysis already completed for this function.
+    return;
+  Analyzed = true;
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  
+  Visited.clear();
+  AliveBits.clear();
+
+  SmallVector<Instruction*, 128> Worklist;
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : instructions(*F)) {
+    if (!isAlwaysLive(&I))
+      continue;
+
+    DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
+    // For integer-valued instructions, set up an initial empty set of alive
+    // bits and add the instruction to the work list. For other instructions
+    // add their operands to the work list (for integer values operands, mark
+    // all bits as live).
+    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+      if (!AliveBits.count(&I)) {
+        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+        Worklist.push_back(&I);
+      }
+
+      continue;
+    }
+
+    // Non-integer-typed instructions...
+    for (Use &OI : I.operands()) {
+      if (Instruction *J = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Worklist.push_back(J);
+      }
+    }
+    // To save memory, we don't add I to the Visited set here. Instead, we
+    // check isAlwaysLive on every instruction when searching for dead
+    // instructions later (we need to check isAlwaysLive for the
+    // integer-typed instructions anyway).
+  }
+
+  // Propagate liveness backwards to operands.
+  while (!Worklist.empty()) {
+    Instruction *UserI = Worklist.pop_back_val();
+
+    DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
+    APInt AOut;
+    if (UserI->getType()->isIntegerTy()) {
+      AOut = AliveBits[UserI];
+      DEBUG(dbgs() << " Alive Out: " << AOut);
+    }
+    DEBUG(dbgs() << "\n");
+
+    if (!UserI->getType()->isIntegerTy())
+      Visited.insert(UserI);
+
+    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+    // Compute the set of alive bits for each operand. These are anded into the
+    // existing set, if any, and if that changes the set of alive bits, the
+    // operand is added to the work-list.
+    for (Use &OI : UserI->operands()) {
+      if (Instruction *I = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+          unsigned BitWidth = IT->getBitWidth();
+          APInt AB = APInt::getAllOnesValue(BitWidth);
+          if (UserI->getType()->isIntegerTy() && !AOut &&
+              !isAlwaysLive(UserI)) {
+            AB = APInt(BitWidth, 0);
+          } else {
+            // If all bits of the output are dead, then all bits of the input
+            // Bits of each operand that are used to compute alive bits of the
+            // output are alive, all others are dead.
+            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+                                     KnownZero, KnownOne,
+                                     KnownZero2, KnownOne2);
+          }
+
+          // If we've added to the set of alive bits (or the operand has not
+          // been previously visited), then re-queue the operand to be visited
+          // again.
+          APInt ABPrev(BitWidth, 0);
+          auto ABI = AliveBits.find(I);
+          if (ABI != AliveBits.end())
+            ABPrev = ABI->second;
+
+          APInt ABNew = AB | ABPrev;
+          if (ABNew != ABPrev || ABI == AliveBits.end()) {
+            AliveBits[I] = std::move(ABNew);
+            Worklist.push_back(I);
+          }
+        } else if (!Visited.count(I)) {
+          Worklist.push_back(I);
+        }
+      }
+    }
+  }
+}
+
+APInt DemandedBits::getDemandedBits(Instruction *I) {
+  performAnalysis();
+  
+  const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
+  if (AliveBits.count(I))
+    return AliveBits[I];
+  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+}
+
+bool DemandedBits::isInstructionDead(Instruction *I) {
+  performAnalysis();
+
+  return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
+    !isAlwaysLive(I);
+}
+
+void DemandedBits::print(raw_ostream &OS, const Module *M) const {
+  // This is gross. But the alternative is making all the state mutable
+  // just because of this one debugging method.
+  const_cast<DemandedBits*>(this)->performAnalysis();
+  for (auto &KV : AliveBits) {
+    OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
+       << *KV.first << "\n";
+  }
+}
+
+FunctionPass *llvm::createDemandedBitsPass() {
+  return new DemandedBits();
+}
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
new file mode 100644
index 0000000..4040ad3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -0,0 +1,4015 @@
+//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DependenceAnalysis is an LLVM pass that analyses dependences between memory
+// accesses. Currently, it is an (incomplete) implementation of the approach
+// described in
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+//
+// There's a single entry point that analyzes the dependence between a pair
+// of memory references in a function, returning either NULL, for no dependence,
+// or a more-or-less detailed description of the dependence between them.
+//
+// Currently, the implementation cannot propagate constraints between
+// coupled RDIV subscripts and lacks a multi-subscript MIV test.
+// Both of these are conservative weaknesses;
+// that is, not a source of correctness problems.
+//
+// The implementation depends on the GEP instruction to differentiate
+// subscripts. Since Clang linearizes some array subscripts, the dependence
+// analysis is using SCEV->delinearize to recover the representation of multiple
+// subscripts, and thus avoid the more expensive and less precise MIV tests. The
+// delinearization is controlled by the flag -da-delinearize.
+//
+// We should pay some careful attention to the possibility of integer overflow
+// in the implementation of the various tests. This could happen with Add,
+// Subtract, or Multiply, with both APInt's and SCEV's.
+//
+// Some non-linear subscript pairs can be handled by the GCD test
+// (and perhaps other tests).
+// Should explore how often these things occur.
+//
+// Finally, it seems like certain test cases expose weaknesses in the SCEV
+// simplification, especially in the handling of sign and zero extensions.
+// It could be useful to spend time exploring these.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+//===----------------------------------------------------------------------===//
+//                                                                            //
+//                   In memory of Ken Kennedy, 1945 - 2007                    //
+//                                                                            //
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "da"
+
+//===----------------------------------------------------------------------===//
+// statistics
+
+STATISTIC(TotalArrayPairs, "Array pairs tested");
+STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
+STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
+STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
+STATISTIC(ZIVapplications, "ZIV applications");
+STATISTIC(ZIVindependence, "ZIV independence");
+STATISTIC(StrongSIVapplications, "Strong SIV applications");
+STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
+STATISTIC(StrongSIVindependence, "Strong SIV independence");
+STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
+STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
+STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
+STATISTIC(ExactSIVapplications, "Exact SIV applications");
+STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
+STATISTIC(ExactSIVindependence, "Exact SIV independence");
+STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
+STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
+STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
+STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
+STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
+STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
+STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
+STATISTIC(DeltaApplications, "Delta applications");
+STATISTIC(DeltaSuccesses, "Delta successes");
+STATISTIC(DeltaIndependence, "Delta independence");
+STATISTIC(DeltaPropagations, "Delta propagations");
+STATISTIC(GCDapplications, "GCD applications");
+STATISTIC(GCDsuccesses, "GCD successes");
+STATISTIC(GCDindependence, "GCD independence");
+STATISTIC(BanerjeeApplications, "Banerjee applications");
+STATISTIC(BanerjeeIndependence, "Banerjee independence");
+STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+
+static cl::opt<bool>
+Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+            cl::desc("Try to delinearize array references."));
+
+//===----------------------------------------------------------------------===//
+// basics
+
+INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
+                      "Dependence Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(DependenceAnalysis, "da",
+                    "Dependence Analysis", true, true)
+
+char DependenceAnalysis::ID = 0;
+
+
+FunctionPass *llvm::createDependenceAnalysisPass() {
+  return new DependenceAnalysis();
+}
+
+
+bool DependenceAnalysis::runOnFunction(Function &F) {
+  this->F = &F;
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  return false;
+}
+
+
+void DependenceAnalysis::releaseMemory() {
+}
+
+
+void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AAResultsWrapperPass>();
+  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
+}
+
+
+// Used to test the dependence analyzer.
+// Looks through the function, noting loads and stores.
+// Calls depends() on every possible pair and prints out the result.
+// Ignores all other instructions.
+static
+void dumpExampleDependence(raw_ostream &OS, Function *F,
+                           DependenceAnalysis *DA) {
+  for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
+       SrcI != SrcE; ++SrcI) {
+    if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
+      for (inst_iterator DstI = SrcI, DstE = inst_end(F);
+           DstI != DstE; ++DstI) {
+        if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
+          OS << "da analyze - ";
+          if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
+            D->dump(OS);
+            for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
+              if (D->isSplitable(Level)) {
+                OS << "da analyze - split level = " << Level;
+                OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
+                OS << "!\n";
+              }
+            }
+          }
+          else
+            OS << "none!\n";
+        }
+      }
+    }
+  }
+}
+
+
+void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+  dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
+}
+
+//===----------------------------------------------------------------------===//
+// Dependence methods
+
+// Returns true if this is an input dependence.
+bool Dependence::isInput() const {
+  return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an output dependence.
+bool Dependence::isOutput() const {
+  return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if this is an flow (aka true)  dependence.
+bool Dependence::isFlow() const {
+  return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
+}
+
+
+// Returns true if this is an anti dependence.
+bool Dependence::isAnti() const {
+  return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+// Leave this out of line, so it will serve as a virtual method anchor
+bool Dependence::isScalar(unsigned level) const {
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FullDependence methods
+
+FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
+                               bool PossiblyLoopIndependent,
+                               unsigned CommonLevels)
+    : Dependence(Source, Destination), Levels(CommonLevels),
+      LoopIndependent(PossiblyLoopIndependent) {
+  Consistent = true;
+  if (CommonLevels)
+    DV = make_unique<DVEntry[]>(CommonLevels);
+}
+
+// The rest are simple getters that hide the implementation.
+
+// getDirection - Returns the direction associated with a particular level.
+unsigned FullDependence::getDirection(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Direction;
+}
+
+
+// Returns the distance (or NULL) associated with a particular level.
+const SCEV *FullDependence::getDistance(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Distance;
+}
+
+
+// Returns true if a particular level is scalar; that is,
+// if no subscript in the source or destination mention the induction
+// variable associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Scalar;
+}
+
+
+// Returns true if peeling the first iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].PeelFirst;
+}
+
+
+// Returns true if peeling the last iteration from this loop
+// will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].PeelLast;
+}
+
+
+// Returns true if splitting this loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level) const {
+  assert(0 < Level && Level <= Levels && "Level out of range");
+  return DV[Level - 1].Splitable;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis::Constraint methods
+
+// If constraint is a point <X, Y>, returns X.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getX() const {
+  assert(Kind == Point && "Kind should be Point");
+  return A;
+}
+
+
+// If constraint is a point <X, Y>, returns Y.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getY() const {
+  assert(Kind == Point && "Kind should be Point");
+  return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns A.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getA() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return A;
+}
+
+
+// If constraint is a line AX + BY = C, returns B.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getB() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return B;
+}
+
+
+// If constraint is a line AX + BY = C, returns C.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getC() const {
+  assert((Kind == Line || Kind == Distance) &&
+         "Kind should be Line (or Distance)");
+  return C;
+}
+
+
+// If constraint is a distance, returns D.
+// Otherwise assert.
+const SCEV *DependenceAnalysis::Constraint::getD() const {
+  assert(Kind == Distance && "Kind should be Distance");
+  return SE->getNegativeSCEV(C);
+}
+
+
+// Returns the loop associated with this constraint.
+const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
+  assert((Kind == Distance || Kind == Line || Kind == Point) &&
+         "Kind should be Distance, Line, or Point");
+  return AssociatedLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
+                                              const SCEV *Y,
+                                              const Loop *CurLoop) {
+  Kind = Point;
+  A = X;
+  B = Y;
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
+                                             const SCEV *BB,
+                                             const SCEV *CC,
+                                             const Loop *CurLoop) {
+  Kind = Line;
+  A = AA;
+  B = BB;
+  C = CC;
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
+                                                 const Loop *CurLoop) {
+  Kind = Distance;
+  A = SE->getOne(D->getType());
+  B = SE->getNegativeSCEV(A);
+  C = SE->getNegativeSCEV(D);
+  AssociatedLoop = CurLoop;
+}
+
+
+void DependenceAnalysis::Constraint::setEmpty() {
+  Kind = Empty;
+}
+
+
+void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
+  SE = NewSE;
+  Kind = Any;
+}
+
+
+// For debugging purposes. Dumps the constraint out to OS.
+void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
+  if (isEmpty())
+    OS << " Empty\n";
+  else if (isAny())
+    OS << " Any\n";
+  else if (isPoint())
+    OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
+  else if (isDistance())
+    OS << " Distance is " << *getD() <<
+      " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
+  else if (isLine())
+    OS << " Line is " << *getA() << "*X + " <<
+      *getB() << "*Y = " << *getC() << "\n";
+  else
+    llvm_unreachable("unknown constraint type in Constraint::dump");
+}
+
+
+// Updates X with the intersection
+// of the Constraints X and Y. Returns true if X has changed.
+// Corresponds to Figure 4 from the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+bool DependenceAnalysis::intersectConstraints(Constraint *X,
+                                              const Constraint *Y) {
+  ++DeltaApplications;
+  DEBUG(dbgs() << "\tintersect constraints\n");
+  DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
+  DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
+  assert(!Y->isPoint() && "Y must not be a Point");
+  if (X->isAny()) {
+    if (Y->isAny())
+      return false;
+    *X = *Y;
+    return true;
+  }
+  if (X->isEmpty())
+    return false;
+  if (Y->isEmpty()) {
+    X->setEmpty();
+    return true;
+  }
+
+  if (X->isDistance() && Y->isDistance()) {
+    DEBUG(dbgs() << "\t    intersect 2 distances\n");
+    if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
+      return false;
+    if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
+      X->setEmpty();
+      ++DeltaSuccesses;
+      return true;
+    }
+    // Hmmm, interesting situation.
+    // I guess if either is constant, keep it and ignore the other.
+    if (isa<SCEVConstant>(Y->getD())) {
+      *X = *Y;
+      return true;
+    }
+    return false;
+  }
+
+  // At this point, the pseudo-code in Figure 4 of the paper
+  // checks if (X->isPoint() && Y->isPoint()).
+  // This case can't occur in our implementation,
+  // since a Point can only arise as the result of intersecting
+  // two Line constraints, and the right-hand value, Y, is never
+  // the result of an intersection.
+  assert(!(X->isPoint() && Y->isPoint()) &&
+         "We shouldn't ever see X->isPoint() && Y->isPoint()");
+
+  if (X->isLine() && Y->isLine()) {
+    DEBUG(dbgs() << "\t    intersect 2 lines\n");
+    const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
+    const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
+      // slopes are equal, so lines are parallel
+      DEBUG(dbgs() << "\t\tsame slope\n");
+      Prod1 = SE->getMulExpr(X->getC(), Y->getB());
+      Prod2 = SE->getMulExpr(X->getB(), Y->getC());
+      if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
+        return false;
+      if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      return false;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
+      // slopes differ, so lines intersect
+      DEBUG(dbgs() << "\t\tdifferent slopes\n");
+      const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
+      const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
+      const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
+      const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
+      const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
+      const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
+      const SCEVConstant *C1A2_C2A1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
+      const SCEVConstant *C1B2_C2B1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
+      const SCEVConstant *A1B2_A2B1 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
+      const SCEVConstant *A2B1_A1B2 =
+        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
+      if (!C1B2_C2B1 || !C1A2_C2A1 ||
+          !A1B2_A2B1 || !A2B1_A1B2)
+        return false;
+      APInt Xtop = C1B2_C2B1->getAPInt();
+      APInt Xbot = A1B2_A2B1->getAPInt();
+      APInt Ytop = C1A2_C2A1->getAPInt();
+      APInt Ybot = A2B1_A1B2->getAPInt();
+      DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
+      DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
+      DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
+      DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
+      APInt Xq = Xtop; // these need to be initialized, even
+      APInt Xr = Xtop; // though they're just going to be overwritten
+      APInt::sdivrem(Xtop, Xbot, Xq, Xr);
+      APInt Yq = Ytop;
+      APInt Yr = Ytop;
+      APInt::sdivrem(Ytop, Ybot, Yq, Yr);
+      if (Xr != 0 || Yr != 0) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
+      if (Xq.slt(0) || Yq.slt(0)) {
+        X->setEmpty();
+        ++DeltaSuccesses;
+        return true;
+      }
+      if (const SCEVConstant *CUB =
+          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+        APInt UpperBound = CUB->getAPInt();
+        DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
+        if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
+          X->setEmpty();
+          ++DeltaSuccesses;
+          return true;
+        }
+      }
+      X->setPoint(SE->getConstant(Xq),
+                  SE->getConstant(Yq),
+                  X->getAssociatedLoop());
+      ++DeltaSuccesses;
+      return true;
+    }
+    return false;
+  }
+
+  // if (X->isLine() && Y->isPoint()) This case can't occur.
+  assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
+
+  if (X->isPoint() && Y->isLine()) {
+    DEBUG(dbgs() << "\t    intersect Point and Line\n");
+    const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
+    const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
+    const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
+      return false;
+    if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
+      X->setEmpty();
+      ++DeltaSuccesses;
+      return true;
+    }
+    return false;
+  }
+
+  llvm_unreachable("shouldn't reach the end of Constraint intersection");
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DependenceAnalysis methods
+
+// For debugging purposes. Dumps a dependence to OS.
+void Dependence::dump(raw_ostream &OS) const {
+  bool Splitable = false;
+  if (isConfused())
+    OS << "confused";
+  else {
+    if (isConsistent())
+      OS << "consistent ";
+    if (isFlow())
+      OS << "flow";
+    else if (isOutput())
+      OS << "output";
+    else if (isAnti())
+      OS << "anti";
+    else if (isInput())
+      OS << "input";
+    unsigned Levels = getLevels();
+    OS << " [";
+    for (unsigned II = 1; II <= Levels; ++II) {
+      if (isSplitable(II))
+        Splitable = true;
+      if (isPeelFirst(II))
+        OS << 'p';
+      const SCEV *Distance = getDistance(II);
+      if (Distance)
+        OS << *Distance;
+      else if (isScalar(II))
+        OS << "S";
+      else {
+        unsigned Direction = getDirection(II);
+        if (Direction == DVEntry::ALL)
+          OS << "*";
+        else {
+          if (Direction & DVEntry::LT)
+            OS << "<";
+          if (Direction & DVEntry::EQ)
+            OS << "=";
+          if (Direction & DVEntry::GT)
+            OS << ">";
+        }
+      }
+      if (isPeelLast(II))
+        OS << 'p';
+      if (II < Levels)
+        OS << " ";
+    }
+    if (isLoopIndependent())
+      OS << "|<";
+    OS << "]";
+    if (Splitable)
+      OS << " splitable";
+  }
+  OS << "!\n";
+}
+
+static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+                                          const DataLayout &DL, const Value *A,
+                                          const Value *B) {
+  const Value *AObj = GetUnderlyingObject(A, DL);
+  const Value *BObj = GetUnderlyingObject(B, DL);
+  return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
+                   BObj, DL.getTypeStoreSize(BObj->getType()));
+}
+
+
+// Returns true if the load or store can be analyzed. Atomic and volatile
+// operations have properties which this analysis does not understand.
+static
+bool isLoadOrStore(const Instruction *I) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isUnordered();
+  else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isUnordered();
+  return false;
+}
+
+
+static
+Value *getPointerOperand(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperand();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  llvm_unreachable("Value is not load or store instruction");
+  return nullptr;
+}
+
+
+// Examines the loop nesting of the Src and Dst
+// instructions and establishes their shared loops. Sets the variables
+// CommonLevels, SrcLevels, and MaxLevels.
+// The source and destination instructions needn't be contained in the same
+// loop. The routine establishNestingLevels finds the level of most deeply
+// nested loop that contains them both, CommonLevels. An instruction that's
+// not contained in a loop is at level = 0. MaxLevels is equal to the level
+// of the source plus the level of the destination, minus CommonLevels.
+// This lets us allocate vectors MaxLevels in length, with room for every
+// distinct loop referenced in both the source and destination subscripts.
+// The variable SrcLevels is the nesting depth of the source instruction.
+// It's used to help calculate distinct loops referenced by the destination.
+// Here's the map from loops to levels:
+//            0 - unused
+//            1 - outermost common loop
+//          ... - other common loops
+// CommonLevels - innermost common loop
+//          ... - loops containing Src but not Dst
+//    SrcLevels - innermost loop containing Src but not Dst
+//          ... - loops containing Dst but not Src
+//    MaxLevels - innermost loops containing Dst but not Src
+// Consider the follow code fragment:
+//   for (a = ...) {
+//     for (b = ...) {
+//       for (c = ...) {
+//         for (d = ...) {
+//           A[] = ...;
+//         }
+//       }
+//       for (e = ...) {
+//         for (f = ...) {
+//           for (g = ...) {
+//             ... = A[];
+//           }
+//         }
+//       }
+//     }
+//   }
+// If we're looking at the possibility of a dependence between the store
+// to A (the Src) and the load from A (the Dst), we'll note that they
+// have 2 loops in common, so CommonLevels will equal 2 and the direction
+// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
+// A map from loop names to loop numbers would look like
+//     a - 1
+//     b - 2 = CommonLevels
+//     c - 3
+//     d - 4 = SrcLevels
+//     e - 5
+//     f - 6
+//     g - 7 = MaxLevels
+void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
+                                                const Instruction *Dst) {
+  const BasicBlock *SrcBlock = Src->getParent();
+  const BasicBlock *DstBlock = Dst->getParent();
+  unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
+  unsigned DstLevel = LI->getLoopDepth(DstBlock);
+  const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
+  const Loop *DstLoop = LI->getLoopFor(DstBlock);
+  SrcLevels = SrcLevel;
+  MaxLevels = SrcLevel + DstLevel;
+  while (SrcLevel > DstLevel) {
+    SrcLoop = SrcLoop->getParentLoop();
+    SrcLevel--;
+  }
+  while (DstLevel > SrcLevel) {
+    DstLoop = DstLoop->getParentLoop();
+    DstLevel--;
+  }
+  while (SrcLoop != DstLoop) {
+    SrcLoop = SrcLoop->getParentLoop();
+    DstLoop = DstLoop->getParentLoop();
+    SrcLevel--;
+  }
+  CommonLevels = SrcLevel;
+  MaxLevels -= CommonLevels;
+}
+
+
+// Given one of the loops containing the source, return
+// its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
+  return SrcLoop->getLoopDepth();
+}
+
+
+// Given one of the loops containing the destination,
+// return its level index in our numbering scheme.
+unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
+  unsigned D = DstLoop->getLoopDepth();
+  if (D > CommonLevels)
+    return D - CommonLevels + SrcLevels;
+  else
+    return D;
+}
+
+
+// Returns true if Expression is loop invariant in LoopNest.
+bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
+                                         const Loop *LoopNest) const {
+  if (!LoopNest)
+    return true;
+  return SE->isLoopInvariant(Expression, LoopNest) &&
+    isLoopInvariant(Expression, LoopNest->getParentLoop());
+}
+
+
+
+// Finds the set of loops from the LoopNest that
+// have a level <= CommonLevels and are referred to by the SCEV Expression.
+void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
+                                            const Loop *LoopNest,
+                                            SmallBitVector &Loops) const {
+  while (LoopNest) {
+    unsigned Level = LoopNest->getLoopDepth();
+    if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
+      Loops.set(Level);
+    LoopNest = LoopNest->getParentLoop();
+  }
+}
+
+void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
+
+  unsigned widestWidthSeen = 0;
+  Type *widestType;
+
+  // Go through each pair and find the widest bit to which we need
+  // to extend all of them.
+  for (unsigned i = 0; i < Pairs.size(); i++) {
+    const SCEV *Src = Pairs[i]->Src;
+    const SCEV *Dst = Pairs[i]->Dst;
+    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+    if (SrcTy == nullptr || DstTy == nullptr) {
+      assert(SrcTy == DstTy && "This function only unify integer types and "
+             "expect Src and Dst share the same type "
+             "otherwise.");
+      continue;
+    }
+    if (SrcTy->getBitWidth() > widestWidthSeen) {
+      widestWidthSeen = SrcTy->getBitWidth();
+      widestType = SrcTy;
+    }
+    if (DstTy->getBitWidth() > widestWidthSeen) {
+      widestWidthSeen = DstTy->getBitWidth();
+      widestType = DstTy;
+    }
+  }
+
+
+  assert(widestWidthSeen > 0);
+
+  // Now extend each pair to the widest seen.
+  for (unsigned i = 0; i < Pairs.size(); i++) {
+    const SCEV *Src = Pairs[i]->Src;
+    const SCEV *Dst = Pairs[i]->Dst;
+    IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+    IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+    if (SrcTy == nullptr || DstTy == nullptr) {
+      assert(SrcTy == DstTy && "This function only unify integer types and "
+             "expect Src and Dst share the same type "
+             "otherwise.");
+      continue;
+    }
+    if (SrcTy->getBitWidth() < widestWidthSeen)
+      // Sign-extend Src to widestType
+      Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType);
+    if (DstTy->getBitWidth() < widestWidthSeen) {
+      // Sign-extend Dst to widestType
+      Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType);
+    }
+  }
+}
+
+// removeMatchingExtensions - Examines a subscript pair.
+// If the source and destination are identically sign (or zero)
+// extended, it strips off the extension in an effect to simplify
+// the actual analysis.
+void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
+  const SCEV *Src = Pair->Src;
+  const SCEV *Dst = Pair->Dst;
+  if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
+      (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
+    const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
+    const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
+    const SCEV *SrcCastOp = SrcCast->getOperand();
+    const SCEV *DstCastOp = DstCast->getOperand();
+    if (SrcCastOp->getType() == DstCastOp->getType()) {
+      Pair->Src = SrcCastOp;
+      Pair->Dst = DstCastOp;
+    }
+  }
+}
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
+                                           const Loop *LoopNest,
+                                           SmallBitVector &Loops) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  if (!AddRec)
+    return isLoopInvariant(Src, LoopNest);
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+  if (!isa<SCEVCouldNotCompute>(UB)) {
+    if (SE->getTypeSizeInBits(Start->getType()) <
+        SE->getTypeSizeInBits(UB->getType())) {
+      if (!AddRec->getNoWrapFlags())
+        return false;
+    }
+  }
+  if (!isLoopInvariant(Step, LoopNest))
+    return false;
+  Loops.set(mapSrcLoop(AddRec->getLoop()));
+  return checkSrcSubscript(Start, LoopNest, Loops);
+}
+
+
+
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
+                                           const Loop *LoopNest,
+                                           SmallBitVector &Loops) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (!AddRec)
+    return isLoopInvariant(Dst, LoopNest);
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+  if (!isa<SCEVCouldNotCompute>(UB)) {
+    if (SE->getTypeSizeInBits(Start->getType()) <
+        SE->getTypeSizeInBits(UB->getType())) {
+      if (!AddRec->getNoWrapFlags())
+        return false;
+    }
+  }
+  if (!isLoopInvariant(Step, LoopNest))
+    return false;
+  Loops.set(mapDstLoop(AddRec->getLoop()));
+  return checkDstSubscript(Start, LoopNest, Loops);
+}
+
+
+// Examines the subscript pair (the Src and Dst SCEVs)
+// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
+// Collects the associated loops in a set.
+DependenceAnalysis::Subscript::ClassificationKind
+DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
+                                 const SCEV *Dst, const Loop *DstLoopNest,
+                                 SmallBitVector &Loops) {
+  SmallBitVector SrcLoops(MaxLevels + 1);
+  SmallBitVector DstLoops(MaxLevels + 1);
+  if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
+    return Subscript::NonLinear;
+  if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
+    return Subscript::NonLinear;
+  Loops = SrcLoops;
+  Loops |= DstLoops;
+  unsigned N = Loops.count();
+  if (N == 0)
+    return Subscript::ZIV;
+  if (N == 1)
+    return Subscript::SIV;
+  if (N == 2 && (SrcLoops.count() == 0 ||
+                 DstLoops.count() == 0 ||
+                 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
+    return Subscript::RDIV;
+  return Subscript::MIV;
+}
+
+
+// A wrapper around SCEV::isKnownPredicate.
+// Looks for cases where we're interested in comparing for equality.
+// If both X and Y have been identically sign or zero extended,
+// it strips off the (confusing) extensions before invoking
+// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
+// will be similarly updated.
+//
+// If SCEV::isKnownPredicate can't prove the predicate,
+// we try simple subtraction, which seems to help in some cases
+// involving symbolics.
+bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
+                                          const SCEV *X,
+                                          const SCEV *Y) const {
+  if (Pred == CmpInst::ICMP_EQ ||
+      Pred == CmpInst::ICMP_NE) {
+    if ((isa<SCEVSignExtendExpr>(X) &&
+         isa<SCEVSignExtendExpr>(Y)) ||
+        (isa<SCEVZeroExtendExpr>(X) &&
+         isa<SCEVZeroExtendExpr>(Y))) {
+      const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
+      const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
+      const SCEV *Xop = CX->getOperand();
+      const SCEV *Yop = CY->getOperand();
+      if (Xop->getType() == Yop->getType()) {
+        X = Xop;
+        Y = Yop;
+      }
+    }
+  }
+  if (SE->isKnownPredicate(Pred, X, Y))
+    return true;
+  // If SE->isKnownPredicate can't prove the condition,
+  // we try the brute-force approach of subtracting
+  // and testing the difference.
+  // By testing with SE->isKnownPredicate first, we avoid
+  // the possibility of overflow when the arguments are constants.
+  const SCEV *Delta = SE->getMinusSCEV(X, Y);
+  switch (Pred) {
+  case CmpInst::ICMP_EQ:
+    return Delta->isZero();
+  case CmpInst::ICMP_NE:
+    return SE->isKnownNonZero(Delta);
+  case CmpInst::ICMP_SGE:
+    return SE->isKnownNonNegative(Delta);
+  case CmpInst::ICMP_SLE:
+    return SE->isKnownNonPositive(Delta);
+  case CmpInst::ICMP_SGT:
+    return SE->isKnownPositive(Delta);
+  case CmpInst::ICMP_SLT:
+    return SE->isKnownNegative(Delta);
+  default:
+    llvm_unreachable("unexpected predicate in isKnownPredicate");
+  }
+}
+
+
+// All subscripts are all the same type.
+// Loop bound may be smaller (e.g., a char).
+// Should zero extend loop bound, since it's always >= 0.
+// This routine collects upper bound and extends or truncates if needed.
+// Truncating is safe when subscripts are known not to wrap. Cases without
+// nowrap flags should have been rejected earlier.
+// Return null if no bound available.
+const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
+                                                  Type *T) const {
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    const SCEV *UB = SE->getBackedgeTakenCount(L);
+    return SE->getTruncateOrZeroExtend(UB, T);
+  }
+  return nullptr;
+}
+
+
+// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
+// If the cast fails, returns NULL.
+const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
+                                                                  Type *T
+                                                                  ) const {
+  if (const SCEV *UB = collectUpperBound(L, T))
+    return dyn_cast<SCEVConstant>(UB);
+  return nullptr;
+}
+
+
+// testZIV -
+// When we have a pair of subscripts of the form [c1] and [c2],
+// where c1 and c2 are both loop invariant, we attack it using
+// the ZIV test. Basically, we test by comparing the two values,
+// but there are actually three possible results:
+// 1) the values are equal, so there's a dependence
+// 2) the values are different, so there's no dependence
+// 3) the values might be equal, so we have to assume a dependence.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testZIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 FullDependence &Result) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  ++ZIVapplications;
+  if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
+    DEBUG(dbgs() << "    provably dependent\n");
+    return false; // provably dependent
+  }
+  if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
+    DEBUG(dbgs() << "    provably independent\n");
+    ++ZIVindependence;
+    return true; // provably independent
+  }
+  DEBUG(dbgs() << "    possibly dependent\n");
+  Result.Consistent = false;
+  return false; // possibly dependent
+}
+
+
+// strongSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.1
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+//  and a is a constant, we can solve it exactly using the Strong SIV test.
+//
+// Can prove independence. Failing that, can compute distance (and direction).
+// In the presence of symbolic terms, we can sometimes make progress.
+//
+// If there's a dependence,
+//
+//    c1 + a*i = c2 + a*i'
+//
+// The dependence distance is
+//
+//    d = i' - i = (c1 - c2)/a
+//
+// A dependence only exists if d is an integer and abs(d) <= U, where U is the
+// loop's upper bound. If a dependence exists, the dependence direction is
+// defined as
+//
+//                { < if d > 0
+//    direction = { = if d = 0
+//                { > if d < 0
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
+                                       const SCEV *SrcConst,
+                                       const SCEV *DstConst,
+                                       const Loop *CurLoop,
+                                       unsigned Level,
+                                       FullDependence &Result,
+                                       Constraint &NewConstraint) const {
+  DEBUG(dbgs() << "\tStrong SIV test\n");
+  DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
+  DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
+  DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
+  DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
+  ++StrongSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "level out of range");
+  Level--;
+
+  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta);
+  DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
+
+  // check that |Delta| < iteration count
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
+    DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
+    const SCEV *AbsDelta =
+      SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
+    const SCEV *AbsCoeff =
+      SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
+    const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
+      // Distance greater than trip count - no dependence
+      ++StrongSIVindependence;
+      ++StrongSIVsuccesses;
+      return true;
+    }
+  }
+
+  // Can we compute distance?
+  if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
+    APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
+    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
+    APInt Distance  = ConstDelta; // these need to be initialized
+    APInt Remainder = ConstDelta;
+    APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
+    DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
+    DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+    // Make sure Coeff divides Delta exactly
+    if (Remainder != 0) {
+      // Coeff doesn't divide Distance, no dependence
+      ++StrongSIVindependence;
+      ++StrongSIVsuccesses;
+      return true;
+    }
+    Result.DV[Level].Distance = SE->getConstant(Distance);
+    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+    if (Distance.sgt(0))
+      Result.DV[Level].Direction &= Dependence::DVEntry::LT;
+    else if (Distance.slt(0))
+      Result.DV[Level].Direction &= Dependence::DVEntry::GT;
+    else
+      Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+    ++StrongSIVsuccesses;
+  }
+  else if (Delta->isZero()) {
+    // since 0/X == 0
+    Result.DV[Level].Distance = Delta;
+    NewConstraint.setDistance(Delta, CurLoop);
+    Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
+    ++StrongSIVsuccesses;
+  }
+  else {
+    if (Coeff->isOne()) {
+      DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
+      Result.DV[Level].Distance = Delta; // since X/1 == X
+      NewConstraint.setDistance(Delta, CurLoop);
+    }
+    else {
+      Result.Consistent = false;
+      NewConstraint.setLine(Coeff,
+                            SE->getNegativeSCEV(Coeff),
+                            SE->getNegativeSCEV(Delta), CurLoop);
+    }
+
+    // maybe we can get a useful direction
+    bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
+    bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
+    bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
+    bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
+    bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
+    // The double negatives above are confusing.
+    // It helps to read !SE->isKnownNonZero(Delta)
+    // as "Delta might be Zero"
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if ((DeltaMaybePositive && CoeffMaybePositive) ||
+        (DeltaMaybeNegative && CoeffMaybeNegative))
+      NewDirection = Dependence::DVEntry::LT;
+    if (DeltaMaybeZero)
+      NewDirection |= Dependence::DVEntry::EQ;
+    if ((DeltaMaybeNegative && CoeffMaybePositive) ||
+        (DeltaMaybePositive && CoeffMaybeNegative))
+      NewDirection |= Dependence::DVEntry::GT;
+    if (NewDirection < Result.DV[Level].Direction)
+      ++StrongSIVsuccesses;
+    Result.DV[Level].Direction &= NewDirection;
+  }
+  return false;
+}
+
+
+// weakCrossingSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Crossing SIV test.
+//
+// Given c1 + a*i = c2 - a*i', we can look for the intersection of
+// the two lines, where i = i', yielding
+//
+//    c1 + a*i = c2 - a*i
+//    2a*i = c2 - c1
+//    i = (c2 - c1)/2a
+//
+// If i < 0, there is no dependence.
+// If i > upperbound, there is no dependence.
+// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
+// If i = upperbound, there's a dependence with distance = 0.
+// If i is integral, there's a dependence (all directions).
+// If the non-integer part = 1/2, there's a dependence (<> directions).
+// Otherwise, there's no dependence.
+//
+// Can prove independence. Failing that,
+// can sometimes refine the directions.
+// Can determine iteration for splitting.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
+                                             const SCEV *SrcConst,
+                                             const SCEV *DstConst,
+                                             const Loop *CurLoop,
+                                             unsigned Level,
+                                             FullDependence &Result,
+                                             Constraint &NewConstraint,
+                                             const SCEV *&SplitIter) const {
+  DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
+  DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakCrossingSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+  if (Delta->isZero()) {
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+    ++WeakCrossingSIVsuccesses;
+    if (!Result.DV[Level].Direction) {
+      ++WeakCrossingSIVindependence;
+      return true;
+    }
+    Result.DV[Level].Distance = Delta; // = 0
+    return false;
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
+  if (!ConstCoeff)
+    return false;
+
+  Result.DV[Level].Splitable = true;
+  if (SE->isKnownNegative(ConstCoeff)) {
+    ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
+    assert(ConstCoeff &&
+           "dynamic cast of negative of ConstCoeff should yield constant");
+    Delta = SE->getNegativeSCEV(Delta);
+  }
+  assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
+
+  // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
+  SplitIter = SE->getUDivExpr(
+      SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
+      SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
+  DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
+
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  if (!ConstDelta)
+    return false;
+
+  // We're certain that ConstCoeff > 0; therefore,
+  // if Delta < 0, then no dependence.
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
+  if (SE->isKnownNegative(Delta)) {
+    // No dependence, Delta < 0
+    ++WeakCrossingSIVindependence;
+    ++WeakCrossingSIVsuccesses;
+    return true;
+  }
+
+  // We're certain that Delta > 0 and ConstCoeff > 0.
+  // Check Delta/(2*ConstCoeff) against upper loop bound
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
+    const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
+                                    ConstantTwo);
+    DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
+    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
+      // Delta too big, no dependence
+      ++WeakCrossingSIVindependence;
+      ++WeakCrossingSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
+      // i = i' = UB
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
+      ++WeakCrossingSIVsuccesses;
+      if (!Result.DV[Level].Direction) {
+        ++WeakCrossingSIVindependence;
+        return true;
+      }
+      Result.DV[Level].Splitable = false;
+      Result.DV[Level].Distance = SE->getZero(Delta->getType());
+      return false;
+    }
+  }
+
+  // check that Coeff divides Delta
+  APInt APDelta = ConstDelta->getAPInt();
+  APInt APCoeff = ConstCoeff->getAPInt();
+  APInt Distance = APDelta; // these need to be initialzed
+  APInt Remainder = APDelta;
+  APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
+  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+  if (Remainder != 0) {
+    // Coeff doesn't divide Delta, no dependence
+    ++WeakCrossingSIVindependence;
+    ++WeakCrossingSIVsuccesses;
+    return true;
+  }
+  DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
+
+  // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
+  APInt Two = APInt(Distance.getBitWidth(), 2, true);
+  Remainder = Distance.srem(Two);
+  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
+  if (Remainder != 0) {
+    // Equal direction isn't possible
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
+    ++WeakCrossingSIVsuccesses;
+  }
+  return false;
+}
+
+
+// Kirch's algorithm, from
+//
+//        Optimizing Supercompilers for Supercomputers
+//        Michael Wolfe
+//        MIT Press, 1989
+//
+// Program 2.1, page 29.
+// Computes the GCD of AM and BM.
+// Also finds a solution to the equation ax - by = gcd(a, b).
+// Returns true if dependence disproved; i.e., gcd does not divide Delta.
+static
+bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
+             APInt &G, APInt &X, APInt &Y) {
+  APInt A0(Bits, 1, true), A1(Bits, 0, true);
+  APInt B0(Bits, 0, true), B1(Bits, 1, true);
+  APInt G0 = AM.abs();
+  APInt G1 = BM.abs();
+  APInt Q = G0; // these need to be initialized
+  APInt R = G0;
+  APInt::sdivrem(G0, G1, Q, R);
+  while (R != 0) {
+    APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
+    APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
+    G0 = G1; G1 = R;
+    APInt::sdivrem(G0, G1, Q, R);
+  }
+  G = G1;
+  DEBUG(dbgs() << "\t    GCD = " << G << "\n");
+  X = AM.slt(0) ? -A1 : A1;
+  Y = BM.slt(0) ? B1 : -B1;
+
+  // make sure gcd divides Delta
+  R = Delta.srem(G);
+  if (R != 0)
+    return true; // gcd doesn't divide Delta, no dependence
+  Q = Delta.sdiv(G);
+  X *= Q;
+  Y *= Q;
+  return false;
+}
+
+
+static
+APInt floorOfQuotient(APInt A, APInt B) {
+  APInt Q = A; // these need to be initialized
+  APInt R = A;
+  APInt::sdivrem(A, B, Q, R);
+  if (R == 0)
+    return Q;
+  if ((A.sgt(0) && B.sgt(0)) ||
+      (A.slt(0) && B.slt(0)))
+    return Q;
+  else
+    return Q - 1;
+}
+
+
+static
+APInt ceilingOfQuotient(APInt A, APInt B) {
+  APInt Q = A; // these need to be initialized
+  APInt R = A;
+  APInt::sdivrem(A, B, Q, R);
+  if (R == 0)
+    return Q;
+  if ((A.sgt(0) && B.sgt(0)) ||
+      (A.slt(0) && B.slt(0)))
+    return Q + 1;
+  else
+    return Q;
+}
+
+
+static
+APInt maxAPInt(APInt A, APInt B) {
+  return A.sgt(B) ? A : B;
+}
+
+
+static
+APInt minAPInt(APInt A, APInt B) {
+  return A.slt(B) ? A : B;
+}
+
+
+// exactSIVtest -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
+// where i is an induction variable, c1 and c2 are loop invariant, and a1
+// and a2 are constant, we can solve it exactly using an algorithm developed
+// by Banerjee and Wolfe. See Section 2.5.3 in
+//
+//        Optimizing Supercompilers for Supercomputers
+//        Michael Wolfe
+//        MIT Press, 1989
+//
+// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
+// so use them if possible. They're also a bit better with symbolics and,
+// in the case of the strong SIV test, can compute Distances.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
+                                      const SCEV *DstCoeff,
+                                      const SCEV *SrcConst,
+                                      const SCEV *DstConst,
+                                      const Loop *CurLoop,
+                                      unsigned Level,
+                                      FullDependence &Result,
+                                      Constraint &NewConstraint) const {
+  DEBUG(dbgs() << "\tExact SIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++ExactSIVapplications;
+  assert(0 < Level && Level <= CommonLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
+                        Delta, CurLoop);
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+    return false;
+
+  // find gcd
+  APInt G, X, Y;
+  APInt AM = ConstSrcCoeff->getAPInt();
+  APInt BM = ConstDstCoeff->getAPInt();
+  unsigned Bits = AM.getBitWidth();
+  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
+    // gcd doesn't divide Delta, no dependence
+    ++ExactSIVindependence;
+    ++ExactSIVsuccesses;
+    return true;
+  }
+
+  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
+
+  // since SCEV construction normalizes, LM = 0
+  APInt UM(Bits, 1, true);
+  bool UMvalid = false;
+  // UM is perhaps unavailable, let's check
+  if (const SCEVConstant *CUB =
+      collectConstantUpperBound(CurLoop, Delta->getType())) {
+    UM = CUB->getAPInt();
+    DEBUG(dbgs() << "\t    UM = " << UM << "\n");
+    UMvalid = true;
+  }
+
+  APInt TU(APInt::getSignedMaxValue(Bits));
+  APInt TL(APInt::getSignedMinValue(Bits));
+
+  // test(BM/G, LM-X) and test(-BM/G, X-UM)
+  APInt TMUL = BM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (UMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (UMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+
+  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+  TMUL = AM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (UMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (UMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+  if (TL.sgt(TU)) {
+    ++ExactSIVindependence;
+    ++ExactSIVsuccesses;
+    return true;
+  }
+
+  // explore directions
+  unsigned NewDirection = Dependence::DVEntry::NONE;
+
+  // less than
+  APInt SaveTU(TU); // save these
+  APInt SaveTL(TL);
+  DEBUG(dbgs() << "\t    exploring LT direction\n");
+  TMUL = AM - BM;
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::LT;
+    ++ExactSIVsuccesses;
+  }
+
+  // equal
+  TU = SaveTU; // restore
+  TL = SaveTL;
+  DEBUG(dbgs() << "\t    exploring EQ direction\n");
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  TMUL = BM - AM;
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::EQ;
+    ++ExactSIVsuccesses;
+  }
+
+  // greater than
+  TU = SaveTU; // restore
+  TL = SaveTL;
+  DEBUG(dbgs() << "\t    exploring GT direction\n");
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
+    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
+  }
+  if (TL.sle(TU)) {
+    NewDirection |= Dependence::DVEntry::GT;
+    ++ExactSIVsuccesses;
+  }
+
+  // finished
+  Result.DV[Level].Direction &= NewDirection;
+  if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
+    ++ExactSIVindependence;
+  return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
+}
+
+
+
+// Return true if the divisor evenly divides the dividend.
+static
+bool isRemainderZero(const SCEVConstant *Dividend,
+                     const SCEVConstant *Divisor) {
+  APInt ConstDividend = Dividend->getAPInt();
+  APInt ConstDivisor = Divisor->getAPInt();
+  return ConstDividend.srem(ConstDivisor) == 0;
+}
+
+
+// weakZeroSrcSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+//    c1 = c2 + a*i
+//
+// we get
+//
+//    (c1 - c2)/a = i
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroDstSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
+                                            const SCEV *SrcConst,
+                                            const SCEV *DstConst,
+                                            const Loop *CurLoop,
+                                            unsigned Level,
+                                            FullDependence &Result,
+                                            Constraint &NewConstraint) const {
+  // For the WeakSIV test, it's possible the loop isn't common to
+  // the Src and Dst loops. If it isn't, then there's no need to
+  // record a direction.
+  DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakZeroSIVapplications;
+  assert(0 < Level && Level <= MaxLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+  NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
+                        CurLoop);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
+    if (Level < CommonLevels) {
+      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+      Result.DV[Level].PeelFirst = true;
+      ++WeakZeroSIVsuccesses;
+    }
+    return false; // dependences caused by first iteration
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstCoeff)
+    return false;
+  const SCEV *AbsCoeff =
+    SE->isKnownNegative(ConstCoeff) ?
+    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+  const SCEV *NewDelta =
+    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+  // check that Delta/SrcCoeff < iteration count
+  // really check NewDelta < count*AbsCoeff
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+      ++WeakZeroSIVindependence;
+      ++WeakZeroSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+      // dependences caused by last iteration
+      if (Level < CommonLevels) {
+        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+        Result.DV[Level].PeelLast = true;
+        ++WeakZeroSIVsuccesses;
+      }
+      return false;
+    }
+  }
+
+  // check that Delta/SrcCoeff >= 0
+  // really check that NewDelta >= 0
+  if (SE->isKnownNegative(NewDelta)) {
+    // No dependence, newDelta < 0
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+
+  // if SrcCoeff doesn't divide Delta, then no dependence
+  if (isa<SCEVConstant>(Delta) &&
+      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+  return false;
+}
+
+
+// weakZeroDstSIVtest -
+// From the paper, Practical Dependence Testing, Section 4.2.2
+//
+// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
+// where i is an induction variable, c1 and c2 are loop invariant,
+// and a is a constant, we can solve it exactly using the
+// Weak-Zero SIV test.
+//
+// Given
+//
+//    c1 + a*i = c2
+//
+// we get
+//
+//    i = (c2 - c1)/a
+//
+// If i is not an integer, there's no dependence.
+// If i < 0 or > UB, there's no dependence.
+// If i = 0, the direction is <= and peeling the
+// 1st iteration will break the dependence.
+// If i = UB, the direction is >= and peeling the
+// last iteration will break the dependence.
+// Otherwise, the direction is *.
+//
+// Can prove independence. Failing that, we can sometimes refine
+// the directions. Can sometimes show that first or last
+// iteration carries all the dependences (so worth peeling).
+//
+// (see also weakZeroSrcSIVtest)
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
+                                            const SCEV *SrcConst,
+                                            const SCEV *DstConst,
+                                            const Loop *CurLoop,
+                                            unsigned Level,
+                                            FullDependence &Result,
+                                            Constraint &NewConstraint) const {
+  // For the WeakSIV test, it's possible the loop isn't common to the
+  // Src and Dst loops. If it isn't, then there's no need to record a direction.
+  DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++WeakZeroSIVapplications;
+  assert(0 < Level && Level <= SrcLevels && "Level out of range");
+  Level--;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
+                        CurLoop);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
+    if (Level < CommonLevels) {
+      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
+      Result.DV[Level].PeelFirst = true;
+      ++WeakZeroSIVsuccesses;
+    }
+    return false; // dependences caused by first iteration
+  }
+  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  if (!ConstCoeff)
+    return false;
+  const SCEV *AbsCoeff =
+    SE->isKnownNegative(ConstCoeff) ?
+    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
+  const SCEV *NewDelta =
+    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
+
+  // check that Delta/SrcCoeff < iteration count
+  // really check NewDelta < count*AbsCoeff
+  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
+    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
+    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
+      ++WeakZeroSIVindependence;
+      ++WeakZeroSIVsuccesses;
+      return true;
+    }
+    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
+      // dependences caused by last iteration
+      if (Level < CommonLevels) {
+        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
+        Result.DV[Level].PeelLast = true;
+        ++WeakZeroSIVsuccesses;
+      }
+      return false;
+    }
+  }
+
+  // check that Delta/SrcCoeff >= 0
+  // really check that NewDelta >= 0
+  if (SE->isKnownNegative(NewDelta)) {
+    // No dependence, newDelta < 0
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+
+  // if SrcCoeff doesn't divide Delta, then no dependence
+  if (isa<SCEVConstant>(Delta) &&
+      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
+    ++WeakZeroSIVindependence;
+    ++WeakZeroSIVsuccesses;
+    return true;
+  }
+  return false;
+}
+
+
+// exactRDIVtest - Tests the RDIV subscript pair for dependence.
+// Things of the form [c1 + a*i] and [c2 + b*j],
+// where i and j are induction variable, c1 and c2 are loop invariant,
+// and a and b are constants.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistent.
+// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
+bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
+                                       const SCEV *DstCoeff,
+                                       const SCEV *SrcConst,
+                                       const SCEV *DstConst,
+                                       const Loop *SrcLoop,
+                                       const Loop *DstLoop,
+                                       FullDependence &Result) const {
+  DEBUG(dbgs() << "\tExact RDIV test\n");
+  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
+  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
+  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
+  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
+  ++ExactRDIVapplications;
+  Result.Consistent = false;
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
+  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
+  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
+  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
+  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
+    return false;
+
+  // find gcd
+  APInt G, X, Y;
+  APInt AM = ConstSrcCoeff->getAPInt();
+  APInt BM = ConstDstCoeff->getAPInt();
+  unsigned Bits = AM.getBitWidth();
+  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
+    // gcd doesn't divide Delta, no dependence
+    ++ExactRDIVindependence;
+    return true;
+  }
+
+  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
+
+  // since SCEV construction seems to normalize, LM = 0
+  APInt SrcUM(Bits, 1, true);
+  bool SrcUMvalid = false;
+  // SrcUM is perhaps unavailable, let's check
+  if (const SCEVConstant *UpperBound =
+      collectConstantUpperBound(SrcLoop, Delta->getType())) {
+    SrcUM = UpperBound->getAPInt();
+    DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
+    SrcUMvalid = true;
+  }
+
+  APInt DstUM(Bits, 1, true);
+  bool DstUMvalid = false;
+  // UM is perhaps unavailable, let's check
+  if (const SCEVConstant *UpperBound =
+      collectConstantUpperBound(DstLoop, Delta->getType())) {
+    DstUM = UpperBound->getAPInt();
+    DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
+    DstUMvalid = true;
+  }
+
+  APInt TU(APInt::getSignedMaxValue(Bits));
+  APInt TL(APInt::getSignedMinValue(Bits));
+
+  // test(BM/G, LM-X) and test(-BM/G, X-UM)
+  APInt TMUL = BM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (SrcUMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (SrcUMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+
+  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
+  TMUL = AM.sdiv(G);
+  if (TMUL.sgt(0)) {
+    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    if (DstUMvalid) {
+      TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    }
+  }
+  else {
+    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
+    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
+    if (DstUMvalid) {
+      TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
+      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
+    }
+  }
+  if (TL.sgt(TU))
+    ++ExactRDIVindependence;
+  return TL.sgt(TU);
+}
+
+
+// symbolicRDIVtest -
+// In Section 4.5 of the Practical Dependence Testing paper,the authors
+// introduce a special case of Banerjee's Inequalities (also called the
+// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
+// particularly cases with symbolics. Since it's only able to disprove
+// dependence (not compute distances or directions), we'll use it as a
+// fall back for the other tests.
+//
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables and c1 and c2 are loop invariants,
+// we can use the symbolic tests to disprove some dependences, serving as a
+// backup for the RDIV test. Note that i and j can be the same variable,
+// letting this test serve as a backup for the various SIV tests.
+//
+// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
+//  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
+// loop bounds for the i and j loops, respectively. So, ...
+//
+// c1 + a1*i = c2 + a2*j
+// a1*i - a2*j = c2 - c1
+//
+// To test for a dependence, we compute c2 - c1 and make sure it's in the
+// range of the maximum and minimum possible values of a1*i - a2*j.
+// Considering the signs of a1 and a2, we have 4 possible cases:
+//
+// 1) If a1 >= 0 and a2 >= 0, then
+//        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
+//              -a2*N2 <= c2 - c1 <= a1*N1
+//
+// 2) If a1 >= 0 and a2 <= 0, then
+//        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
+//                  0 <= c2 - c1 <= a1*N1 - a2*N2
+//
+// 3) If a1 <= 0 and a2 >= 0, then
+//        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
+//        a1*N1 - a2*N2 <= c2 - c1 <= 0
+//
+// 4) If a1 <= 0 and a2 <= 0, then
+//        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
+//        a1*N1         <= c2 - c1 <=       -a2*N2
+//
+// return true if dependence disproved
+bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
+                                          const SCEV *A2,
+                                          const SCEV *C1,
+                                          const SCEV *C2,
+                                          const Loop *Loop1,
+                                          const Loop *Loop2) const {
+  ++SymbolicRDIVapplications;
+  DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
+  DEBUG(dbgs() << "\t    A1 = " << *A1);
+  DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
+  DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
+  DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
+  DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
+  const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
+  const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
+  DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
+  DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
+  const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
+  const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
+  DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
+  DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
+  if (SE->isKnownNonNegative(A1)) {
+    if (SE->isKnownNonNegative(A2)) {
+      // A1 >= 0 && A2 >= 0
+      if (N1) {
+        // make sure that c2 - c1 <= a1*N1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      if (N2) {
+        // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+    }
+    else if (SE->isKnownNonPositive(A2)) {
+      // a1 >= 0 && a2 <= 0
+      if (N1 && N2) {
+        // make sure that c2 - c1 <= a1*N1 - a2*N2
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      // make sure that 0 <= c2 - c1
+      if (SE->isKnownNegative(C2_C1)) {
+        ++SymbolicRDIVindependence;
+        return true;
+      }
+    }
+  }
+  else if (SE->isKnownNonPositive(A1)) {
+    if (SE->isKnownNonNegative(A2)) {
+      // a1 <= 0 && a2 >= 0
+      if (N1 && N2) {
+        // make sure that a1*N1 - a2*N2 <= c2 - c1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
+        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      // make sure that c2 - c1 <= 0
+      if (SE->isKnownPositive(C2_C1)) {
+        ++SymbolicRDIVindependence;
+        return true;
+      }
+    }
+    else if (SE->isKnownNonPositive(A2)) {
+      // a1 <= 0 && a2 <= 0
+      if (N1) {
+        // make sure that a1*N1 <= c2 - c1
+        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
+        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+      if (N2) {
+        // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
+        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
+        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
+        if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
+          ++SymbolicRDIVindependence;
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+
+// testSIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
+// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
+// a2 are constant, we attack it with an SIV test. While they can all be
+// solved with the Exact SIV test, it's worthwhile to use simpler tests when
+// they apply; they're cheaper and sometimes more precise.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testSIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 unsigned &Level,
+                                 FullDependence &Result,
+                                 Constraint &NewConstraint,
+                                 const SCEV *&SplitIter) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (SrcAddRec && DstAddRec) {
+    const SCEV *SrcConst = SrcAddRec->getStart();
+    const SCEV *DstConst = DstAddRec->getStart();
+    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    const Loop *CurLoop = SrcAddRec->getLoop();
+    assert(CurLoop == DstAddRec->getLoop() &&
+           "both loops in SIV should be same");
+    Level = mapSrcLoop(CurLoop);
+    bool disproven;
+    if (SrcCoeff == DstCoeff)
+      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                                Level, Result, NewConstraint);
+    else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
+      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                                      Level, Result, NewConstraint, SplitIter);
+    else
+      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
+                               Level, Result, NewConstraint);
+    return disproven ||
+      gcdMIVtest(Src, Dst, Result) ||
+      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+  }
+  if (SrcAddRec) {
+    const SCEV *SrcConst = SrcAddRec->getStart();
+    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    const SCEV *DstConst = Dst;
+    const Loop *CurLoop = SrcAddRec->getLoop();
+    Level = mapSrcLoop(CurLoop);
+    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
+                              Level, Result, NewConstraint) ||
+      gcdMIVtest(Src, Dst, Result);
+  }
+  if (DstAddRec) {
+    const SCEV *DstConst = DstAddRec->getStart();
+    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    const SCEV *SrcConst = Src;
+    const Loop *CurLoop = DstAddRec->getLoop();
+    Level = mapDstLoop(CurLoop);
+    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
+                              CurLoop, Level, Result, NewConstraint) ||
+      gcdMIVtest(Src, Dst, Result);
+  }
+  llvm_unreachable("SIV test expected at least one AddRec");
+  return false;
+}
+
+
+// testRDIV -
+// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
+// where i and j are induction variables, c1 and c2 are loop invariant,
+// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
+// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
+// It doesn't make sense to talk about distance or direction in this case,
+// so there's no point in making special versions of the Strong SIV test or
+// the Weak-crossing SIV test.
+//
+// With minor algebra, this test can also be used for things like
+// [c1 + a1*i + a2*j][c2].
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::testRDIV(const SCEV *Src,
+                                  const SCEV *Dst,
+                                  FullDependence &Result) const {
+  // we have 3 possible situations here:
+  //   1) [a*i + b] and [c*j + d]
+  //   2) [a*i + c*j + b] and [d]
+  //   3) [b] and [a*i + c*j + d]
+  // We need to find what we've got and get organized
+
+  const SCEV *SrcConst, *DstConst;
+  const SCEV *SrcCoeff, *DstCoeff;
+  const Loop *SrcLoop, *DstLoop;
+
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
+  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
+  if (SrcAddRec && DstAddRec) {
+    SrcConst = SrcAddRec->getStart();
+    SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
+    SrcLoop = SrcAddRec->getLoop();
+    DstConst = DstAddRec->getStart();
+    DstCoeff = DstAddRec->getStepRecurrence(*SE);
+    DstLoop = DstAddRec->getLoop();
+  }
+  else if (SrcAddRec) {
+    if (const SCEVAddRecExpr *tmpAddRec =
+        dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
+      SrcConst = tmpAddRec->getStart();
+      SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
+      SrcLoop = tmpAddRec->getLoop();
+      DstConst = Dst;
+      DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
+      DstLoop = SrcAddRec->getLoop();
+    }
+    else
+      llvm_unreachable("RDIV reached by surprising SCEVs");
+  }
+  else if (DstAddRec) {
+    if (const SCEVAddRecExpr *tmpAddRec =
+        dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
+      DstConst = tmpAddRec->getStart();
+      DstCoeff = tmpAddRec->getStepRecurrence(*SE);
+      DstLoop = tmpAddRec->getLoop();
+      SrcConst = Src;
+      SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
+      SrcLoop = DstAddRec->getLoop();
+    }
+    else
+      llvm_unreachable("RDIV reached by surprising SCEVs");
+  }
+  else
+    llvm_unreachable("RDIV expected at least one AddRec");
+  return exactRDIVtest(SrcCoeff, DstCoeff,
+                       SrcConst, DstConst,
+                       SrcLoop, DstLoop,
+                       Result) ||
+    gcdMIVtest(Src, Dst, Result) ||
+    symbolicRDIVtest(SrcCoeff, DstCoeff,
+                     SrcConst, DstConst,
+                     SrcLoop, DstLoop);
+}
+
+
+// Tests the single-subscript MIV pair (Src and Dst) for dependence.
+// Return true if dependence disproved.
+// Can sometimes refine direction vectors.
+bool DependenceAnalysis::testMIV(const SCEV *Src,
+                                 const SCEV *Dst,
+                                 const SmallBitVector &Loops,
+                                 FullDependence &Result) const {
+  DEBUG(dbgs() << "    src = " << *Src << "\n");
+  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
+  Result.Consistent = false;
+  return gcdMIVtest(Src, Dst, Result) ||
+    banerjeeMIVtest(Src, Dst, Loops, Result);
+}
+
+
+// Given a product, e.g., 10*X*Y, returns the first constant operand,
+// in this case 10. If there is no constant part, returns NULL.
+static
+const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
+  for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
+    if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
+      return Constant;
+  }
+  return nullptr;
+}
+
+
+//===----------------------------------------------------------------------===//
+// gcdMIVtest -
+// Tests an MIV subscript pair for dependence.
+// Returns true if any possible dependence is disproved.
+// Marks the result as inconsistent.
+// Can sometimes disprove the equal direction for 1 or more loops,
+// as discussed in Michael Wolfe's book,
+// High Performance Compilers for Parallel Computing, page 235.
+//
+// We spend some effort (code!) to handle cases like
+// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
+// but M and N are just loop-invariant variables.
+// This should help us handle linearized subscripts;
+// also makes this test a useful backup to the various SIV tests.
+//
+// It occurs to me that the presence of loop-invariant variables
+// changes the nature of the test from "greatest common divisor"
+// to "a common divisor".
+bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
+                                    const SCEV *Dst,
+                                    FullDependence &Result) const {
+  DEBUG(dbgs() << "starting gcd\n");
+  ++GCDapplications;
+  unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
+  APInt RunningGCD = APInt::getNullValue(BitWidth);
+
+  // Examine Src coefficients.
+  // Compute running GCD and record source constant.
+  // Because we're looking for the constant at the end of the chain,
+  // we can't quit the loop just because the GCD == 1.
+  const SCEV *Coefficients = Src;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    if (!Constant)
+      return false;
+    APInt ConstCoeff = Constant->getAPInt();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    Coefficients = AddRec->getStart();
+  }
+  const SCEV *SrcConst = Coefficients;
+
+  // Examine Dst coefficients.
+  // Compute running GCD and record destination constant.
+  // Because we're looking for the constant at the end of the chain,
+  // we can't quit the loop just because the GCD == 1.
+  Coefficients = Dst;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    if (!Constant)
+      return false;
+    APInt ConstCoeff = Constant->getAPInt();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    Coefficients = AddRec->getStart();
+  }
+  const SCEV *DstConst = Coefficients;
+
+  APInt ExtraGCD = APInt::getNullValue(BitWidth);
+  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
+  DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
+  const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
+  if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
+    // If Delta is a sum of products, we may be able to make further progress.
+    for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
+      const SCEV *Operand = Sum->getOperand(Op);
+      if (isa<SCEVConstant>(Operand)) {
+        assert(!Constant && "Surprised to find multiple constants");
+        Constant = cast<SCEVConstant>(Operand);
+      }
+      else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
+        // Search for constant operand to participate in GCD;
+        // If none found; return false.
+        const SCEVConstant *ConstOp = getConstantPart(Product);
+        if (!ConstOp)
+          return false;
+        APInt ConstOpValue = ConstOp->getAPInt();
+        ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
+                                                   ConstOpValue.abs());
+      }
+      else
+        return false;
+    }
+  }
+  if (!Constant)
+    return false;
+  APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
+  DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
+  if (ConstDelta == 0)
+    return false;
+  RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
+  DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
+  APInt Remainder = ConstDelta.srem(RunningGCD);
+  if (Remainder != 0) {
+    ++GCDindependence;
+    return true;
+  }
+
+  // Try to disprove equal directions.
+  // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
+  // the code above can't disprove the dependence because the GCD = 1.
+  // So we consider what happen if i = i' and what happens if j = j'.
+  // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
+  // which is infeasible, so we can disallow the = direction for the i level.
+  // Setting j = j' doesn't help matters, so we end up with a direction vector
+  // of [<>, *]
+  //
+  // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
+  // we need to remember that the constant part is 5 and the RunningGCD should
+  // be initialized to ExtraGCD = 30.
+  DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
+
+  bool Improved = false;
+  Coefficients = Src;
+  while (const SCEVAddRecExpr *AddRec =
+         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
+    Coefficients = AddRec->getStart();
+    const Loop *CurLoop = AddRec->getLoop();
+    RunningGCD = ExtraGCD;
+    const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
+    const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
+    const SCEV *Inner = Src;
+    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+      AddRec = cast<SCEVAddRecExpr>(Inner);
+      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+      if (CurLoop == AddRec->getLoop())
+        ; // SrcCoeff == Coeff
+      else {
+        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+          // If the coefficient is the product of a constant and other stuff,
+          // we can use the constant in the GCD computation.
+          Constant = getConstantPart(Product);
+        else
+          Constant = cast<SCEVConstant>(Coeff);
+        APInt ConstCoeff = Constant->getAPInt();
+        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+      }
+      Inner = AddRec->getStart();
+    }
+    Inner = Dst;
+    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
+      AddRec = cast<SCEVAddRecExpr>(Inner);
+      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
+      if (CurLoop == AddRec->getLoop())
+        DstCoeff = Coeff;
+      else {
+        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
+          // If the coefficient is the product of a constant and other stuff,
+          // we can use the constant in the GCD computation.
+          Constant = getConstantPart(Product);
+        else
+          Constant = cast<SCEVConstant>(Coeff);
+        APInt ConstCoeff = Constant->getAPInt();
+        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+      }
+      Inner = AddRec->getStart();
+    }
+    Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
+    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
+      // If the coefficient is the product of a constant and other stuff,
+      // we can use the constant in the GCD computation.
+      Constant = getConstantPart(Product);
+    else if (isa<SCEVConstant>(Delta))
+      Constant = cast<SCEVConstant>(Delta);
+    else {
+      // The difference of the two coefficients might not be a product
+      // or constant, in which case we give up on this direction.
+      continue;
+    }
+    APInt ConstCoeff = Constant->getAPInt();
+    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
+    DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
+    if (RunningGCD != 0) {
+      Remainder = ConstDelta.srem(RunningGCD);
+      DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
+      if (Remainder != 0) {
+        unsigned Level = mapSrcLoop(CurLoop);
+        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
+        Improved = true;
+      }
+    }
+  }
+  if (Improved)
+    ++GCDsuccesses;
+  DEBUG(dbgs() << "all done\n");
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// banerjeeMIVtest -
+// Use Banerjee's Inequalities to test an MIV subscript pair.
+// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
+// Generally follows the discussion in Section 2.5.2 of
+//
+//    Optimizing Supercompilers for Supercomputers
+//    Michael Wolfe
+//
+// The inequalities given on page 25 are simplified in that loops are
+// normalized so that the lower bound is always 0 and the stride is always 1.
+// For example, Wolfe gives
+//
+//     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// where A_k is the coefficient of the kth index in the source subscript,
+// B_k is the coefficient of the kth index in the destination subscript,
+// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
+// index, and N_k is the stride of the kth index. Since all loops are normalized
+// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
+// equation to
+//
+//     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
+//            = (A^-_k - B_k)^- (U_k - 1)  - B_k
+//
+// Similar simplifications are possible for the other equations.
+//
+// When we can't determine the number of iterations for a loop,
+// we use NULL as an indicator for the worst case, infinity.
+// When computing the upper bound, NULL denotes +inf;
+// for the lower bound, NULL denotes -inf.
+//
+// Return true if dependence disproved.
+bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
+                                         const SCEV *Dst,
+                                         const SmallBitVector &Loops,
+                                         FullDependence &Result) const {
+  DEBUG(dbgs() << "starting Banerjee\n");
+  ++BanerjeeApplications;
+  DEBUG(dbgs() << "    Src = " << *Src << '\n');
+  const SCEV *A0;
+  CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
+  DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
+  const SCEV *B0;
+  CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
+  BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
+  const SCEV *Delta = SE->getMinusSCEV(B0, A0);
+  DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
+
+  // Compute bounds for all the * directions.
+  DEBUG(dbgs() << "\tBounds[*]\n");
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
+    Bound[K].Direction = Dependence::DVEntry::ALL;
+    Bound[K].DirSet = Dependence::DVEntry::NONE;
+    findBoundsALL(A, B, Bound, K);
+#ifndef NDEBUG
+    DEBUG(dbgs() << "\t    " << K << '\t');
+    if (Bound[K].Lower[Dependence::DVEntry::ALL])
+      DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
+    else
+      DEBUG(dbgs() << "-inf\t");
+    if (Bound[K].Upper[Dependence::DVEntry::ALL])
+      DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
+    else
+      DEBUG(dbgs() << "+inf\n");
+#endif
+  }
+
+  // Test the *, *, *, ... case.
+  bool Disproved = false;
+  if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
+    // Explore the direction vector hierarchy.
+    unsigned DepthExpanded = 0;
+    unsigned NewDeps = exploreDirections(1, A, B, Bound,
+                                         Loops, DepthExpanded, Delta);
+    if (NewDeps > 0) {
+      bool Improved = false;
+      for (unsigned K = 1; K <= CommonLevels; ++K) {
+        if (Loops[K]) {
+          unsigned Old = Result.DV[K - 1].Direction;
+          Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
+          Improved |= Old != Result.DV[K - 1].Direction;
+          if (!Result.DV[K - 1].Direction) {
+            Improved = false;
+            Disproved = true;
+            break;
+          }
+        }
+      }
+      if (Improved)
+        ++BanerjeeSuccesses;
+    }
+    else {
+      ++BanerjeeIndependence;
+      Disproved = true;
+    }
+  }
+  else {
+    ++BanerjeeIndependence;
+    Disproved = true;
+  }
+  delete [] Bound;
+  delete [] A;
+  delete [] B;
+  return Disproved;
+}
+
+
+// Hierarchically expands the direction vector
+// search space, combining the directions of discovered dependences
+// in the DirSet field of Bound. Returns the number of distinct
+// dependences discovered. If the dependence is disproved,
+// it will return 0.
+unsigned DependenceAnalysis::exploreDirections(unsigned Level,
+                                               CoefficientInfo *A,
+                                               CoefficientInfo *B,
+                                               BoundInfo *Bound,
+                                               const SmallBitVector &Loops,
+                                               unsigned &DepthExpanded,
+                                               const SCEV *Delta) const {
+  if (Level > CommonLevels) {
+    // record result
+    DEBUG(dbgs() << "\t[");
+    for (unsigned K = 1; K <= CommonLevels; ++K) {
+      if (Loops[K]) {
+        Bound[K].DirSet |= Bound[K].Direction;
+#ifndef NDEBUG
+        switch (Bound[K].Direction) {
+        case Dependence::DVEntry::LT:
+          DEBUG(dbgs() << " <");
+          break;
+        case Dependence::DVEntry::EQ:
+          DEBUG(dbgs() << " =");
+          break;
+        case Dependence::DVEntry::GT:
+          DEBUG(dbgs() << " >");
+          break;
+        case Dependence::DVEntry::ALL:
+          DEBUG(dbgs() << " *");
+          break;
+        default:
+          llvm_unreachable("unexpected Bound[K].Direction");
+        }
+#endif
+      }
+    }
+    DEBUG(dbgs() << " ]\n");
+    return 1;
+  }
+  if (Loops[Level]) {
+    if (Level > DepthExpanded) {
+      DepthExpanded = Level;
+      // compute bounds for <, =, > at current level
+      findBoundsLT(A, B, Bound, Level);
+      findBoundsGT(A, B, Bound, Level);
+      findBoundsEQ(A, B, Bound, Level);
+#ifndef NDEBUG
+      DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
+      DEBUG(dbgs() << "\t    <\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::LT])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::LT])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+      DEBUG(dbgs() << "\t    =\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::EQ])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::EQ])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+      DEBUG(dbgs() << "\t    >\t");
+      if (Bound[Level].Lower[Dependence::DVEntry::GT])
+        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
+      else
+        DEBUG(dbgs() << "-inf\t");
+      if (Bound[Level].Upper[Dependence::DVEntry::GT])
+        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
+      else
+        DEBUG(dbgs() << "+inf\n");
+#endif
+    }
+
+    unsigned NewDeps = 0;
+
+    // test bounds for <, *, *, ...
+    if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    // Test bounds for =, *, *, ...
+    if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    // test bounds for >, *, *, ...
+    if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
+      NewDeps += exploreDirections(Level + 1, A, B, Bound,
+                                   Loops, DepthExpanded, Delta);
+
+    Bound[Level].Direction = Dependence::DVEntry::ALL;
+    return NewDeps;
+  }
+  else
+    return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
+}
+
+
+// Returns true iff the current bounds are plausible.
+bool DependenceAnalysis::testBounds(unsigned char DirKind,
+                                    unsigned Level,
+                                    BoundInfo *Bound,
+                                    const SCEV *Delta) const {
+  Bound[Level].Direction = DirKind;
+  if (const SCEV *LowerBound = getLowerBound(Bound))
+    if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
+      return false;
+  if (const SCEV *UpperBound = getUpperBound(Bound))
+    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
+      return false;
+  return true;
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the * direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
+//    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^*_k = (A^-_k - B^+_k)U_k
+//    UB^*_k = (A^+_k - B^-_k)U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
+                                       CoefficientInfo *B,
+                                       BoundInfo *Bound,
+                                       unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    Bound[K].Lower[Dependence::DVEntry::ALL] =
+      SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
+                     Bound[K].Iterations);
+    Bound[K].Upper[Dependence::DVEntry::ALL] =
+      SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
+                     Bound[K].Iterations);
+  }
+  else {
+    // If the difference is 0, we won't need to know the number of iterations.
+    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
+      Bound[K].Lower[Dependence::DVEntry::ALL] =
+          SE->getZero(A[K].Coeff->getType());
+    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
+      Bound[K].Upper[Dependence::DVEntry::ALL] =
+          SE->getZero(A[K].Coeff->getType());
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the = direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
+//    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^=_k = (A_k - B_k)^- U_k
+//    UB^=_k = (A_k - B_k)^+ U_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+// Note that the lower bound is always <= 0
+// and the upper bound is always >= 0.
+void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+    const SCEV *NegativePart = getNegativePart(Delta);
+    Bound[K].Lower[Dependence::DVEntry::EQ] =
+      SE->getMulExpr(NegativePart, Bound[K].Iterations);
+    const SCEV *PositivePart = getPositivePart(Delta);
+    Bound[K].Upper[Dependence::DVEntry::EQ] =
+      SE->getMulExpr(PositivePart, Bound[K].Iterations);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
+    const SCEV *NegativePart = getNegativePart(Delta);
+    if (NegativePart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
+    const SCEV *PositivePart = getPositivePart(Delta);
+    if (PositivePart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the < direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
+//    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Iter_1 = SE->getMinusSCEV(
+        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+    Bound[K].Lower[Dependence::DVEntry::LT] =
+      SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+    Bound[K].Upper[Dependence::DVEntry::LT] =
+      SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
+    if (NegPart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
+    if (PosPart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
+  }
+}
+
+
+// Computes the upper and lower bounds for level K
+// using the > direction. Records them in Bound.
+// Wolfe gives the equations
+//
+//    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
+//
+// Since we normalize loops, we can simplify these equations to
+//
+//    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
+//    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
+//
+// We must be careful to handle the case where the upper bound is unknown.
+void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
+                                      CoefficientInfo *B,
+                                      BoundInfo *Bound,
+                                      unsigned K) const {
+  Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
+  if (Bound[K].Iterations) {
+    const SCEV *Iter_1 = SE->getMinusSCEV(
+        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
+    const SCEV *NegPart =
+      getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+    Bound[K].Lower[Dependence::DVEntry::GT] =
+      SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
+    const SCEV *PosPart =
+      getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+    Bound[K].Upper[Dependence::DVEntry::GT] =
+      SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
+  }
+  else {
+    // If the positive/negative part of the difference is 0,
+    // we won't need to know the number of iterations.
+    const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
+    if (NegPart->isZero())
+      Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
+    const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
+    if (PosPart->isZero())
+      Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
+  }
+}
+
+
+// X^+ = max(X, 0)
+const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
+  return SE->getSMaxExpr(X, SE->getZero(X->getType()));
+}
+
+
+// X^- = min(X, 0)
+const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
+  return SE->getSMinExpr(X, SE->getZero(X->getType()));
+}
+
+
+// Walks through the subscript,
+// collecting each coefficient, the associated loop bounds,
+// and recording its positive and negative parts for later use.
+DependenceAnalysis::CoefficientInfo *
+DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
+                                     bool SrcFlag,
+                                     const SCEV *&Constant) const {
+  const SCEV *Zero = SE->getZero(Subscript->getType());
+  CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    CI[K].Coeff = Zero;
+    CI[K].PosPart = Zero;
+    CI[K].NegPart = Zero;
+    CI[K].Iterations = nullptr;
+  }
+  while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
+    const Loop *L = AddRec->getLoop();
+    unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
+    CI[K].Coeff = AddRec->getStepRecurrence(*SE);
+    CI[K].PosPart = getPositivePart(CI[K].Coeff);
+    CI[K].NegPart = getNegativePart(CI[K].Coeff);
+    CI[K].Iterations = collectUpperBound(L, Subscript->getType());
+    Subscript = AddRec->getStart();
+  }
+  Constant = Subscript;
+#ifndef NDEBUG
+  DEBUG(dbgs() << "\tCoefficient Info\n");
+  for (unsigned K = 1; K <= MaxLevels; ++K) {
+    DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
+    DEBUG(dbgs() << "\tPos Part = ");
+    DEBUG(dbgs() << *CI[K].PosPart);
+    DEBUG(dbgs() << "\tNeg Part = ");
+    DEBUG(dbgs() << *CI[K].NegPart);
+    DEBUG(dbgs() << "\tUpper Bound = ");
+    if (CI[K].Iterations)
+      DEBUG(dbgs() << *CI[K].Iterations);
+    else
+      DEBUG(dbgs() << "+inf");
+    DEBUG(dbgs() << '\n');
+  }
+  DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
+#endif
+  return CI;
+}
+
+
+// Looks through all the bounds info and
+// computes the lower bound given the current direction settings
+// at each level. If the lower bound for any level is -inf,
+// the result is -inf.
+const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
+  const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
+  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+    if (Bound[K].Lower[Bound[K].Direction])
+      Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
+    else
+      Sum = nullptr;
+  }
+  return Sum;
+}
+
+
+// Looks through all the bounds info and
+// computes the upper bound given the current direction settings
+// at each level. If the upper bound at any level is +inf,
+// the result is +inf.
+const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
+  const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
+  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
+    if (Bound[K].Upper[Bound[K].Direction])
+      Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
+    else
+      Sum = nullptr;
+  }
+  return Sum;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constraint manipulation for Delta test.
+
+// Given a linear SCEV,
+// return the coefficient (the step)
+// corresponding to the specified loop.
+// If there isn't one, return 0.
+// For example, given a*i + b*j + c*k, finding the coefficient
+// corresponding to the j loop would yield b.
+const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
+                                                const Loop *TargetLoop)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec)
+    return SE->getZero(Expr->getType());
+  if (AddRec->getLoop() == TargetLoop)
+    return AddRec->getStepRecurrence(*SE);
+  return findCoefficient(AddRec->getStart(), TargetLoop);
+}
+
+
+// Given a linear SCEV,
+// return the SCEV given by zeroing out the coefficient
+// corresponding to the specified loop.
+// For example, given a*i + b*j + c*k, zeroing the coefficient
+// corresponding to the j loop would yield a*i + c*k.
+const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
+                                                const Loop *TargetLoop)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec)
+    return Expr; // ignore
+  if (AddRec->getLoop() == TargetLoop)
+    return AddRec->getStart();
+  return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
+                           AddRec->getStepRecurrence(*SE),
+                           AddRec->getLoop(),
+                           AddRec->getNoWrapFlags());
+}
+
+
+// Given a linear SCEV Expr,
+// return the SCEV given by adding some Value to the
+// coefficient corresponding to the specified TargetLoop.
+// For example, given a*i + b*j + c*k, adding 1 to the coefficient
+// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
+const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
+                                                 const Loop *TargetLoop,
+                                                 const SCEV *Value)  const {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec) // create a new addRec
+    return SE->getAddRecExpr(Expr,
+                             Value,
+                             TargetLoop,
+                             SCEV::FlagAnyWrap); // Worst case, with no info.
+  if (AddRec->getLoop() == TargetLoop) {
+    const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
+    if (Sum->isZero())
+      return AddRec->getStart();
+    return SE->getAddRecExpr(AddRec->getStart(),
+                             Sum,
+                             AddRec->getLoop(),
+                             AddRec->getNoWrapFlags());
+  }
+  if (SE->isLoopInvariant(AddRec, TargetLoop))
+    return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
+  return SE->getAddRecExpr(
+      addToCoefficient(AddRec->getStart(), TargetLoop, Value),
+      AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
+      AddRec->getNoWrapFlags());
+}
+
+
+// Review the constraints, looking for opportunities
+// to simplify a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+// Corresponds to Figure 5 from the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+bool DependenceAnalysis::propagate(const SCEV *&Src,
+                                   const SCEV *&Dst,
+                                   SmallBitVector &Loops,
+                                   SmallVectorImpl<Constraint> &Constraints,
+                                   bool &Consistent) {
+  bool Result = false;
+  for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+    DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
+    DEBUG(Constraints[LI].dump(dbgs()));
+    if (Constraints[LI].isDistance())
+      Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
+    else if (Constraints[LI].isLine())
+      Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
+    else if (Constraints[LI].isPoint())
+      Result |= propagatePoint(Src, Dst, Constraints[LI]);
+  }
+  return Result;
+}
+
+
+// Attempt to propagate a distance
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
+                                           const SCEV *&Dst,
+                                           Constraint &CurConstraint,
+                                           bool &Consistent) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  if (A_K->isZero())
+    return false;
+  const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
+  Src = SE->getMinusSCEV(Src, DA_K);
+  Src = zeroCoefficient(Src, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+  if (!findCoefficient(Dst, CurLoop)->isZero())
+    Consistent = false;
+  return true;
+}
+
+
+// Attempt to propagate a line
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+// If the simplification isn't exact (that is, if it is conservative
+// in terms of dependence), set consistent to false.
+bool DependenceAnalysis::propagateLine(const SCEV *&Src,
+                                       const SCEV *&Dst,
+                                       Constraint &CurConstraint,
+                                       bool &Consistent) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const SCEV *A = CurConstraint.getA();
+  const SCEV *B = CurConstraint.getB();
+  const SCEV *C = CurConstraint.getC();
+  DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
+  DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
+  if (A->isZero()) {
+    const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Bconst || !Cconst) return false;
+    APInt Beta = Bconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
+    APInt CdivB = Charlie.sdiv(Beta);
+    assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
+    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+    //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+    Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
+    Dst = zeroCoefficient(Dst, CurLoop);
+    if (!findCoefficient(Src, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else if (B->isZero()) {
+    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Aconst || !Cconst) return false;
+    APInt Alpha = Aconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
+    APInt CdivA = Charlie.sdiv(Alpha);
+    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+    Src = zeroCoefficient(Src, CurLoop);
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
+    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
+    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
+    if (!Aconst || !Cconst) return false;
+    APInt Alpha = Aconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
+    APInt CdivA = Charlie.sdiv(Alpha);
+    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
+    Src = zeroCoefficient(Src, CurLoop);
+    Dst = addToCoefficient(Dst, CurLoop, A_K);
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  else {
+    // paper is incorrect here, or perhaps just misleading
+    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    Src = SE->getMulExpr(Src, A);
+    Dst = SE->getMulExpr(Dst, A);
+    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
+    Src = zeroCoefficient(Src, CurLoop);
+    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
+    if (!findCoefficient(Dst, CurLoop)->isZero())
+      Consistent = false;
+  }
+  DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
+  return true;
+}
+
+
+// Attempt to propagate a point
+// constraint into a subscript pair (Src and Dst).
+// Return true if some simplification occurs.
+bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
+                                        const SCEV *&Dst,
+                                        Constraint &CurConstraint) {
+  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+  const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
+  const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
+  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
+  Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
+  Src = zeroCoefficient(Src, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
+  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
+  Dst = zeroCoefficient(Dst, CurLoop);
+  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
+  return true;
+}
+
+
+// Update direction vector entry based on the current constraint.
+void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
+                                         const Constraint &CurConstraint
+                                         ) const {
+  DEBUG(dbgs() << "\tUpdate direction, constraint =");
+  DEBUG(CurConstraint.dump(dbgs()));
+  if (CurConstraint.isAny())
+    ; // use defaults
+  else if (CurConstraint.isDistance()) {
+    // this one is consistent, the others aren't
+    Level.Scalar = false;
+    Level.Distance = CurConstraint.getD();
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
+      NewDirection = Dependence::DVEntry::EQ;
+    if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
+      NewDirection |= Dependence::DVEntry::LT;
+    if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
+      NewDirection |= Dependence::DVEntry::GT;
+    Level.Direction &= NewDirection;
+  }
+  else if (CurConstraint.isLine()) {
+    Level.Scalar = false;
+    Level.Distance = nullptr;
+    // direction should be accurate
+  }
+  else if (CurConstraint.isPoint()) {
+    Level.Scalar = false;
+    Level.Distance = nullptr;
+    unsigned NewDirection = Dependence::DVEntry::NONE;
+    if (!isKnownPredicate(CmpInst::ICMP_NE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if X may be = Y
+      NewDirection |= Dependence::DVEntry::EQ;
+    if (!isKnownPredicate(CmpInst::ICMP_SLE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if Y may be > X
+      NewDirection |= Dependence::DVEntry::LT;
+    if (!isKnownPredicate(CmpInst::ICMP_SGE,
+                          CurConstraint.getY(),
+                          CurConstraint.getX()))
+      // if Y may be < X
+      NewDirection |= Dependence::DVEntry::GT;
+    Level.Direction &= NewDirection;
+  }
+  else
+    llvm_unreachable("constraint has unexpected kind");
+}
+
+/// Check if we can delinearize the subscripts. If the SCEVs representing the
+/// source and destination array references are recurrences on a nested loop,
+/// this function flattens the nested recurrences into separate recurrences
+/// for each loop level.
+bool DependenceAnalysis::tryDelinearize(Instruction *Src,
+                                        Instruction *Dst,
+                                        SmallVectorImpl<Subscript> &Pair)
+{
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
+
+  Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+  Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+
+  // Below code mimics the code in Delinearization.cpp
+  const SCEV *SrcAccessFn =
+    SE->getSCEVAtScope(SrcPtr, SrcLoop);
+  const SCEV *DstAccessFn =
+    SE->getSCEVAtScope(DstPtr, DstLoop);
+
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+
+  if (!SrcBase || !DstBase || SrcBase != DstBase)
+    return false;
+
+  const SCEV *ElementSize = SE->getElementSize(Src);
+  if (ElementSize != SE->getElementSize(Dst))
+    return false;
+
+  const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
+  const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
+
+  const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
+  const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
+  if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
+    return false;
+
+  // First step: collect parametric terms in both array references.
+  SmallVector<const SCEV *, 4> Terms;
+  SE->collectParametricTerms(SrcAR, Terms);
+  SE->collectParametricTerms(DstAR, Terms);
+
+  // Second step: find subscript sizes.
+  SmallVector<const SCEV *, 4> Sizes;
+  SE->findArrayDimensions(Terms, Sizes, ElementSize);
+
+  // Third step: compute the access functions for each subscript.
+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+  SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
+  SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
+
+  // Fail when there is only a subscript: that's a linearized access function.
+  if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
+      SrcSubscripts.size() != DstSubscripts.size())
+    return false;
+
+  int size = SrcSubscripts.size();
+
+  DEBUG({
+      dbgs() << "\nSrcSubscripts: ";
+    for (int i = 0; i < size; i++)
+      dbgs() << *SrcSubscripts[i];
+    dbgs() << "\nDstSubscripts: ";
+    for (int i = 0; i < size; i++)
+      dbgs() << *DstSubscripts[i];
+    });
+
+  // The delinearization transforms a single-subscript MIV dependence test into
+  // a multi-subscript SIV dependence test that is easier to compute. So we
+  // resize Pair to contain as many pairs of subscripts as the delinearization
+  // has found, and then initialize the pairs following the delinearization.
+  Pair.resize(size);
+  for (int i = 0; i < size; ++i) {
+    Pair[i].Src = SrcSubscripts[i];
+    Pair[i].Dst = DstSubscripts[i];
+    unifySubscriptType(&Pair[i]);
+
+    // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
+    // delinearization has found, and add these constraints to the dependence
+    // check to avoid memory accesses overflow from one dimension into another.
+    // This is related to the problem of determining the existence of data
+    // dependences in array accesses using a different number of subscripts: in
+    // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
+  }
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// For debugging purposes, dump a small bit vector to dbgs().
+static void dumpSmallBitVector(SmallBitVector &BV) {
+  dbgs() << "{";
+  for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+    dbgs() << VI;
+    if (BV.find_next(VI) >= 0)
+      dbgs() << ' ';
+  }
+  dbgs() << "}\n";
+}
+#endif
+
+// depends -
+// Returns NULL if there is no dependence.
+// Otherwise, return a Dependence with as many details as possible.
+// Corresponds to Section 3.1 in the paper
+//
+//            Practical Dependence Testing
+//            Goff, Kennedy, Tseng
+//            PLDI 1991
+//
+// Care is required to keep the routine below, getSplitIteration(),
+// up to date with respect to this routine.
+std::unique_ptr<Dependence>
+DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
+                            bool PossiblyLoopIndependent) {
+  if (Src == Dst)
+    PossiblyLoopIndependent = false;
+
+  if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
+      (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+    // if both instructions don't reference memory, there's no dependence
+    return nullptr;
+
+  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
+    // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+    DEBUG(dbgs() << "can only handle simple loads and stores\n");
+    return make_unique<Dependence>(Src, Dst);
+  }
+
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
+
+  switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+                                 SrcPtr)) {
+  case MayAlias:
+  case PartialAlias:
+    // cannot analyse objects if we don't understand their aliasing.
+    DEBUG(dbgs() << "can't analyze may or partial alias\n");
+    return make_unique<Dependence>(Src, Dst);
+  case NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(dbgs() << "no alias\n");
+    return nullptr;
+  case MustAlias:
+    break; // The underlying objects alias; test accesses for dependence.
+  }
+
+  // establish loop nesting levels
+  establishNestingLevels(Src, Dst);
+  DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
+  DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
+
+  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  ++TotalArrayPairs;
+
+  // See if there are GEPs we can use.
+  bool UsefulGEP = false;
+  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  if (SrcGEP && DstGEP &&
+      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+    DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
+    DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
+
+    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
+  }
+  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
+  SmallVector<Subscript, 4> Pair(Pairs);
+  if (UsefulGEP) {
+    DEBUG(dbgs() << "    using GEPs\n");
+    unsigned P = 0;
+    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+           SrcEnd = SrcGEP->idx_end(),
+           DstIdx = DstGEP->idx_begin();
+         SrcIdx != SrcEnd;
+         ++SrcIdx, ++DstIdx, ++P) {
+      Pair[P].Src = SE->getSCEV(*SrcIdx);
+      Pair[P].Dst = SE->getSCEV(*DstIdx);
+      unifySubscriptType(&Pair[P]);
+    }
+  }
+  else {
+    DEBUG(dbgs() << "    ignoring GEPs\n");
+    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+    DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
+    DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
+    Pair[0].Src = SrcSCEV;
+    Pair[0].Dst = DstSCEV;
+  }
+
+  if (Delinearize && CommonLevels > 1) {
+    if (tryDelinearize(Src, Dst, Pair)) {
+      DEBUG(dbgs() << "    delinerized GEP\n");
+      Pairs = Pair.size();
+    }
+  }
+
+  for (unsigned P = 0; P < Pairs; ++P) {
+    Pair[P].Loops.resize(MaxLevels + 1);
+    Pair[P].GroupLoops.resize(MaxLevels + 1);
+    Pair[P].Group.resize(Pairs);
+    removeMatchingExtensions(&Pair[P]);
+    Pair[P].Classification =
+      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[P].Loops);
+    Pair[P].GroupLoops = Pair[P].Loops;
+    Pair[P].Group.set(P);
+    DEBUG(dbgs() << "    subscript " << P << "\n");
+    DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
+    DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
+    DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
+    DEBUG(dbgs() << "\tloops = ");
+    DEBUG(dumpSmallBitVector(Pair[P].Loops));
+  }
+
+  SmallBitVector Separable(Pairs);
+  SmallBitVector Coupled(Pairs);
+
+  // Partition subscripts into separable and minimally-coupled groups
+  // Algorithm in paper is algorithmically better;
+  // this may be faster in practice. Check someday.
+  //
+  // Here's an example of how it works. Consider this code:
+  //
+  //   for (i = ...) {
+  //     for (j = ...) {
+  //       for (k = ...) {
+  //         for (l = ...) {
+  //           for (m = ...) {
+  //             A[i][j][k][m] = ...;
+  //             ... = A[0][j][l][i + j];
+  //           }
+  //         }
+  //       }
+  //     }
+  //   }
+  //
+  // There are 4 subscripts here:
+  //    0 [i] and [0]
+  //    1 [j] and [j]
+  //    2 [k] and [l]
+  //    3 [m] and [i + j]
+  //
+  // We've already classified each subscript pair as ZIV, SIV, etc.,
+  // and collected all the loops mentioned by pair P in Pair[P].Loops.
+  // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
+  // and set Pair[P].Group = {P}.
+  //
+  //      Src Dst    Classification Loops  GroupLoops Group
+  //    0 [i] [0]         SIV       {1}      {1}        {0}
+  //    1 [j] [j]         SIV       {2}      {2}        {1}
+  //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
+  //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
+  //
+  // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
+  // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
+  //
+  // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
+  // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
+  // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
+  // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
+  // to either Separable or Coupled).
+  //
+  // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
+  // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+  // so Pair[3].Group = {0, 1, 3} and Done = false.
+  //
+  // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
+  // Since Done remains true, we add 2 to the set of Separable pairs.
+  //
+  // Finally, we consider 3. There's nothing to compare it with,
+  // so Done remains true and we add it to the Coupled set.
+  // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
+  //
+  // In the end, we've got 1 separable subscript and 1 coupled group.
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    if (Pair[SI].Classification == Subscript::NonLinear) {
+      // ignore these, but collect loops for later
+      ++NonlinearSubscriptPairs;
+      collectCommonLoops(Pair[SI].Src,
+                         LI->getLoopFor(Src->getParent()),
+                         Pair[SI].Loops);
+      collectCommonLoops(Pair[SI].Dst,
+                         LI->getLoopFor(Dst->getParent()),
+                         Pair[SI].Loops);
+      Result.Consistent = false;
+    } else if (Pair[SI].Classification == Subscript::ZIV) {
+      // always separable
+      Separable.set(SI);
+    }
+    else {
+      // SIV, RDIV, or MIV, so check for coupled group
+      bool Done = true;
+      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+        SmallBitVector Intersection = Pair[SI].GroupLoops;
+        Intersection &= Pair[SJ].GroupLoops;
+        if (Intersection.any()) {
+          // accumulate set of all the loops in group
+          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+          // accumulate set of all subscripts in group
+          Pair[SJ].Group |= Pair[SI].Group;
+          Done = false;
+        }
+      }
+      if (Done) {
+        if (Pair[SI].Group.count() == 1) {
+          Separable.set(SI);
+          ++SeparableSubscriptPairs;
+        }
+        else {
+          Coupled.set(SI);
+          ++CoupledSubscriptPairs;
+        }
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "    Separable = ");
+  DEBUG(dumpSmallBitVector(Separable));
+  DEBUG(dbgs() << "    Coupled = ");
+  DEBUG(dumpSmallBitVector(Coupled));
+
+  Constraint NewConstraint;
+  NewConstraint.setAny(SE);
+
+  // test separable subscripts
+  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+    DEBUG(dbgs() << "testing subscript " << SI);
+    switch (Pair[SI].Classification) {
+    case Subscript::ZIV:
+      DEBUG(dbgs() << ", ZIV\n");
+      if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
+        return nullptr;
+      break;
+    case Subscript::SIV: {
+      DEBUG(dbgs() << ", SIV\n");
+      unsigned Level;
+      const SCEV *SplitIter = nullptr;
+      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
+                  SplitIter))
+        return nullptr;
+      break;
+    }
+    case Subscript::RDIV:
+      DEBUG(dbgs() << ", RDIV\n");
+      if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
+        return nullptr;
+      break;
+    case Subscript::MIV:
+      DEBUG(dbgs() << ", MIV\n");
+      if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
+        return nullptr;
+      break;
+    default:
+      llvm_unreachable("subscript has unexpected classification");
+    }
+  }
+
+  if (Coupled.count()) {
+    // test coupled subscript groups
+    DEBUG(dbgs() << "starting on coupled subscripts\n");
+    DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
+    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+    for (unsigned II = 0; II <= MaxLevels; ++II)
+      Constraints[II].setAny(SE);
+    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+      DEBUG(dbgs() << "testing subscript group " << SI << " { ");
+      SmallBitVector Group(Pair[SI].Group);
+      SmallBitVector Sivs(Pairs);
+      SmallBitVector Mivs(Pairs);
+      SmallBitVector ConstrainedLevels(MaxLevels + 1);
+      SmallVector<Subscript *, 4> PairsInGroup;
+      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+        DEBUG(dbgs() << SJ << " ");
+        if (Pair[SJ].Classification == Subscript::SIV)
+          Sivs.set(SJ);
+        else
+          Mivs.set(SJ);
+        PairsInGroup.push_back(&Pair[SJ]);
+      }
+      unifySubscriptType(PairsInGroup);
+      DEBUG(dbgs() << "}\n");
+      while (Sivs.any()) {
+        bool Changed = false;
+        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+          DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
+          // SJ is an SIV subscript that's part of the current coupled group
+          unsigned Level;
+          const SCEV *SplitIter = nullptr;
+          DEBUG(dbgs() << "SIV\n");
+          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
+                      SplitIter))
+            return nullptr;
+          ConstrainedLevels.set(Level);
+          if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
+            if (Constraints[Level].isEmpty()) {
+              ++DeltaIndependence;
+              return nullptr;
+            }
+            Changed = true;
+          }
+          Sivs.reset(SJ);
+        }
+        if (Changed) {
+          // propagate, possibly creating new SIVs and ZIVs
+          DEBUG(dbgs() << "    propagating\n");
+          DEBUG(dbgs() << "\tMivs = ");
+          DEBUG(dumpSmallBitVector(Mivs));
+          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+            // SJ is an MIV subscript that's part of the current coupled group
+            DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
+            if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
+                          Constraints, Result.Consistent)) {
+              DEBUG(dbgs() << "\t    Changed\n");
+              ++DeltaPropagations;
+              Pair[SJ].Classification =
+                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+                             Pair[SJ].Loops);
+              switch (Pair[SJ].Classification) {
+              case Subscript::ZIV:
+                DEBUG(dbgs() << "ZIV\n");
+                if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+                  return nullptr;
+                Mivs.reset(SJ);
+                break;
+              case Subscript::SIV:
+                Sivs.set(SJ);
+                Mivs.reset(SJ);
+                break;
+              case Subscript::RDIV:
+              case Subscript::MIV:
+                break;
+              default:
+                llvm_unreachable("bad subscript classification");
+              }
+            }
+          }
+        }
+      }
+
+      // test & propagate remaining RDIVs
+      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::RDIV) {
+          DEBUG(dbgs() << "RDIV test\n");
+          if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
+            return nullptr;
+          // I don't yet understand how to propagate RDIV results
+          Mivs.reset(SJ);
+        }
+      }
+
+      // test remaining MIVs
+      // This code is temporary.
+      // Better to somehow test all remaining subscripts simultaneously.
+      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::MIV) {
+          DEBUG(dbgs() << "MIV test\n");
+          if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
+            return nullptr;
+        }
+        else
+          llvm_unreachable("expected only MIV subscripts at this point");
+      }
+
+      // update Result.DV from constraint vector
+      DEBUG(dbgs() << "    updating\n");
+      for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
+           SJ = ConstrainedLevels.find_next(SJ)) {
+        if (SJ > (int)CommonLevels)
+          break;
+        updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
+        if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
+          return nullptr;
+      }
+    }
+  }
+
+  // Make sure the Scalar flags are set correctly.
+  SmallBitVector CompleteLoops(MaxLevels + 1);
+  for (unsigned SI = 0; SI < Pairs; ++SI)
+    CompleteLoops |= Pair[SI].Loops;
+  for (unsigned II = 1; II <= CommonLevels; ++II)
+    if (CompleteLoops[II])
+      Result.DV[II - 1].Scalar = false;
+
+  if (PossiblyLoopIndependent) {
+    // Make sure the LoopIndependent flag is set correctly.
+    // All directions must include equal, otherwise no
+    // loop-independent dependence is possible.
+    for (unsigned II = 1; II <= CommonLevels; ++II) {
+      if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
+        Result.LoopIndependent = false;
+        break;
+      }
+    }
+  }
+  else {
+    // On the other hand, if all directions are equal and there's no
+    // loop-independent dependence possible, then no dependence exists.
+    bool AllEqual = true;
+    for (unsigned II = 1; II <= CommonLevels; ++II) {
+      if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
+        AllEqual = false;
+        break;
+      }
+    }
+    if (AllEqual)
+      return nullptr;
+  }
+
+  return make_unique<FullDependence>(std::move(Result));
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// getSplitIteration -
+// Rather than spend rarely-used space recording the splitting iteration
+// during the Weak-Crossing SIV test, we re-compute it on demand.
+// The re-computation is basically a repeat of the entire dependence test,
+// though simplified since we know that the dependence exists.
+// It's tedious, since we must go through all propagations, etc.
+//
+// Care is required to keep this code up to date with respect to the routine
+// above, depends().
+//
+// Generally, the dependence analyzer will be used to build
+// a dependence graph for a function (basically a map from instructions
+// to dependences). Looking for cycles in the graph shows us loops
+// that cannot be trivially vectorized/parallelized.
+//
+// We can try to improve the situation by examining all the dependences
+// that make up the cycle, looking for ones we can break.
+// Sometimes, peeling the first or last iteration of a loop will break
+// dependences, and we've got flags for those possibilities.
+// Sometimes, splitting a loop at some other iteration will do the trick,
+// and we've got a flag for that case. Rather than waste the space to
+// record the exact iteration (since we rarely know), we provide
+// a method that calculates the iteration. It's a drag that it must work
+// from scratch, but wonderful in that it's possible.
+//
+// Here's an example:
+//
+//    for (i = 0; i < 10; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//
+// There's a loop-carried flow dependence from the store to the load,
+// found by the weak-crossing SIV test. The dependence will have a flag,
+// indicating that the dependence can be broken by splitting the loop.
+// Calling getSplitIteration will return 5.
+// Splitting the loop breaks the dependence, like so:
+//
+//    for (i = 0; i <= 5; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//    for (i = 6; i < 10; i++)
+//        A[i] = ...
+//        ... = A[11 - i]
+//
+// breaks the dependence and allows us to vectorize/parallelize
+// both loops.
+const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
+                                                   unsigned SplitLevel) {
+  assert(Dep.isSplitable(SplitLevel) &&
+         "Dep should be splitable at SplitLevel");
+  Instruction *Src = Dep.getSrc();
+  Instruction *Dst = Dep.getDst();
+  assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
+  assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
+  assert(isLoadOrStore(Src));
+  assert(isLoadOrStore(Dst));
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
+  assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+                                SrcPtr) == MustAlias);
+
+  // establish loop nesting levels
+  establishNestingLevels(Src, Dst);
+
+  FullDependence Result(Src, Dst, false, CommonLevels);
+
+  // See if there are GEPs we can use.
+  bool UsefulGEP = false;
+  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  if (SrcGEP && DstGEP &&
+      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
+  }
+  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
+  SmallVector<Subscript, 4> Pair(Pairs);
+  if (UsefulGEP) {
+    unsigned P = 0;
+    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+           SrcEnd = SrcGEP->idx_end(),
+           DstIdx = DstGEP->idx_begin();
+         SrcIdx != SrcEnd;
+         ++SrcIdx, ++DstIdx, ++P) {
+      Pair[P].Src = SE->getSCEV(*SrcIdx);
+      Pair[P].Dst = SE->getSCEV(*DstIdx);
+    }
+  }
+  else {
+    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+    Pair[0].Src = SrcSCEV;
+    Pair[0].Dst = DstSCEV;
+  }
+
+  if (Delinearize && CommonLevels > 1) {
+    if (tryDelinearize(Src, Dst, Pair)) {
+      DEBUG(dbgs() << "    delinerized GEP\n");
+      Pairs = Pair.size();
+    }
+  }
+
+  for (unsigned P = 0; P < Pairs; ++P) {
+    Pair[P].Loops.resize(MaxLevels + 1);
+    Pair[P].GroupLoops.resize(MaxLevels + 1);
+    Pair[P].Group.resize(Pairs);
+    removeMatchingExtensions(&Pair[P]);
+    Pair[P].Classification =
+      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[P].Loops);
+    Pair[P].GroupLoops = Pair[P].Loops;
+    Pair[P].Group.set(P);
+  }
+
+  SmallBitVector Separable(Pairs);
+  SmallBitVector Coupled(Pairs);
+
+  // partition subscripts into separable and minimally-coupled groups
+  for (unsigned SI = 0; SI < Pairs; ++SI) {
+    if (Pair[SI].Classification == Subscript::NonLinear) {
+      // ignore these, but collect loops for later
+      collectCommonLoops(Pair[SI].Src,
+                         LI->getLoopFor(Src->getParent()),
+                         Pair[SI].Loops);
+      collectCommonLoops(Pair[SI].Dst,
+                         LI->getLoopFor(Dst->getParent()),
+                         Pair[SI].Loops);
+      Result.Consistent = false;
+    }
+    else if (Pair[SI].Classification == Subscript::ZIV)
+      Separable.set(SI);
+    else {
+      // SIV, RDIV, or MIV, so check for coupled group
+      bool Done = true;
+      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
+        SmallBitVector Intersection = Pair[SI].GroupLoops;
+        Intersection &= Pair[SJ].GroupLoops;
+        if (Intersection.any()) {
+          // accumulate set of all the loops in group
+          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
+          // accumulate set of all subscripts in group
+          Pair[SJ].Group |= Pair[SI].Group;
+          Done = false;
+        }
+      }
+      if (Done) {
+        if (Pair[SI].Group.count() == 1)
+          Separable.set(SI);
+        else
+          Coupled.set(SI);
+      }
+    }
+  }
+
+  Constraint NewConstraint;
+  NewConstraint.setAny(SE);
+
+  // test separable subscripts
+  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+    switch (Pair[SI].Classification) {
+    case Subscript::SIV: {
+      unsigned Level;
+      const SCEV *SplitIter = nullptr;
+      (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
+                     Result, NewConstraint, SplitIter);
+      if (Level == SplitLevel) {
+        assert(SplitIter != nullptr);
+        return SplitIter;
+      }
+      break;
+    }
+    case Subscript::ZIV:
+    case Subscript::RDIV:
+    case Subscript::MIV:
+      break;
+    default:
+      llvm_unreachable("subscript has unexpected classification");
+    }
+  }
+
+  if (Coupled.count()) {
+    // test coupled subscript groups
+    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
+    for (unsigned II = 0; II <= MaxLevels; ++II)
+      Constraints[II].setAny(SE);
+    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+      SmallBitVector Group(Pair[SI].Group);
+      SmallBitVector Sivs(Pairs);
+      SmallBitVector Mivs(Pairs);
+      SmallBitVector ConstrainedLevels(MaxLevels + 1);
+      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+        if (Pair[SJ].Classification == Subscript::SIV)
+          Sivs.set(SJ);
+        else
+          Mivs.set(SJ);
+      }
+      while (Sivs.any()) {
+        bool Changed = false;
+        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+          // SJ is an SIV subscript that's part of the current coupled group
+          unsigned Level;
+          const SCEV *SplitIter = nullptr;
+          (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
+                         Result, NewConstraint, SplitIter);
+          if (Level == SplitLevel && SplitIter)
+            return SplitIter;
+          ConstrainedLevels.set(Level);
+          if (intersectConstraints(&Constraints[Level], &NewConstraint))
+            Changed = true;
+          Sivs.reset(SJ);
+        }
+        if (Changed) {
+          // propagate, possibly creating new SIVs and ZIVs
+          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+            // SJ is an MIV subscript that's part of the current coupled group
+            if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
+                          Pair[SJ].Loops, Constraints, Result.Consistent)) {
+              Pair[SJ].Classification =
+                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
+                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
+                             Pair[SJ].Loops);
+              switch (Pair[SJ].Classification) {
+              case Subscript::ZIV:
+                Mivs.reset(SJ);
+                break;
+              case Subscript::SIV:
+                Sivs.set(SJ);
+                Mivs.reset(SJ);
+                break;
+              case Subscript::RDIV:
+              case Subscript::MIV:
+                break;
+              default:
+                llvm_unreachable("bad subscript classification");
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  llvm_unreachable("somehow reached end of routine");
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..5ae6d74
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,320 @@
+//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+//   Divergence Analysis
+//   Sampaio, Souza, Collange, Pereira
+//   TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+//    non-kernel-entry function and the return value of a function call as
+//    divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+//    generic or local address as divergent. This can be improved by leveraging
+//    pointer analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <vector>
+using namespace llvm;
+
+namespace {
+
+class DivergencePropagator {
+public:
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+  void populateWithSourcesOfDivergence();
+  void propagate();
+
+private:
+  // A helper function that explores data dependents of V.
+  void exploreDataDependency(Value *V);
+  // A helper function that explores sync dependents of TI.
+  void exploreSyncDependency(TerminatorInst *TI);
+  // Computes the influence region from Start to End. This region includes all
+  // basic blocks on any simple path from Start to End.
+  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+                              DenseSet<BasicBlock *> &InfluenceRegion);
+  // Finds all users of I that are outside the influence region, and add these
+  // users to Worklist.
+  void findUsersOutsideInfluenceRegion(
+      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+  Function &F;
+  TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  std::vector<Value *> Worklist; // Stack for DFS.
+  DenseSet<const Value *> &DV;   // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+  Worklist.clear();
+  DV.clear();
+  for (auto &I : instructions(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      Worklist.push_back(&I);
+      DV.insert(&I);
+    }
+  }
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      Worklist.push_back(&Arg);
+      DV.insert(&Arg);
+    }
+  }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+  // immediate post dominator are divergent. This rule handles if-then-else
+  // patterns. For example,
+  //
+  // if (tid < 5)
+  //   a1 = 1;
+  // else
+  //   a2 = 2;
+  // a = phi(a1, a2); // sync dependent on (tid < 5)
+  BasicBlock *ThisBB = TI->getParent();
+  BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+  if (IPostDom == nullptr)
+    return;
+
+  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+    // A PHINode is uniform if it returns the same value no matter which path is
+    // taken.
+    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second)
+      Worklist.push_back(&*I);
+  }
+
+  // Propagation rule 2: if a value defined in a loop is used outside, the user
+  // is sync dependent on the condition of the loop exits that dominate the
+  // user. For example,
+  //
+  // int i = 0;
+  // do {
+  //   i++;
+  //   if (foo(i)) ... // uniform
+  // } while (i < tid);
+  // if (bar(i)) ...   // divergent
+  //
+  // A program may contain unstructured loops. Therefore, we cannot leverage
+  // LoopInfo, which only recognizes natural loops.
+  //
+  // The algorithm used here handles both natural and unstructured loops.  Given
+  // a branch TI, we first compute its influence region, the union of all simple
+  // paths from TI to its immediate post dominator (IPostDom). Then, we search
+  // for all the values defined in the influence region but used outside. All
+  // these users are sync dependent on TI.
+  DenseSet<BasicBlock *> InfluenceRegion;
+  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+  // An insight that can speed up the search process is that all the in-region
+  // values that are used outside must dominate TI. Therefore, instead of
+  // searching every basic blocks in the influence region, we search all the
+  // dominators of TI until it is outside the influence region.
+  BasicBlock *InfluencedBB = ThisBB;
+  while (InfluenceRegion.count(InfluencedBB)) {
+    for (auto &I : *InfluencedBB)
+      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+    if (IDomNode == nullptr)
+      break;
+    InfluencedBB = IDomNode->getBlock();
+  }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+  for (User *U : I.users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!InfluenceRegion.count(UserInst->getParent())) {
+      if (DV.insert(UserInst).second)
+        Worklist.push_back(UserInst);
+    }
+  }
+}
+
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+                               DenseSet<BasicBlock *> &InfluenceRegion,
+                               std::vector<BasicBlock *> &InfluenceStack) {
+  for (BasicBlock *Succ : successors(ThisBB)) {
+    if (Succ != End && InfluenceRegion.insert(Succ).second)
+      InfluenceStack.push_back(Succ);
+  }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+    BasicBlock *Start, BasicBlock *End,
+    DenseSet<BasicBlock *> &InfluenceRegion) {
+  assert(PDT.properlyDominates(End, Start) &&
+         "End does not properly dominate Start");
+
+  // The influence region starts from the end of "Start" to the beginning of
+  // "End". Therefore, "Start" should not be in the region unless "Start" is in
+  // a loop that doesn't contain "End".
+  std::vector<BasicBlock *> InfluenceStack;
+  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
+  while (!InfluenceStack.empty()) {
+    BasicBlock *BB = InfluenceStack.back();
+    InfluenceStack.pop_back();
+    addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+  }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+  // Follow def-use chains of V.
+  for (User *U : V->users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (DV.insert(UserInst).second)
+      Worklist.push_back(UserInst);
+  }
+}
+
+void DivergencePropagator::propagate() {
+  // Traverse the dependency graph using DFS.
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+      // Terminators with less than two successors won't introduce sync
+      // dependency. Ignore them.
+      if (TI->getNumSuccessors() > 1)
+        exploreSyncDependency(TI);
+    }
+    exploreDataDependency(V);
+  }
+}
+
+} /// end namespace anonymous
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                    false, true)
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+  return new DivergenceAnalysis();
+}
+
+void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<PostDominatorTree>();
+  AU.setPreservesAll();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  if (TTIWP == nullptr)
+    return false;
+
+  TargetTransformInfo &TTI = TTIWP->getTTI(F);
+  // Fast path: if the target does not have branch divergence, we do not mark
+  // any branch as divergent.
+  if (!TTI.hasBranchDivergence())
+    return false;
+
+  DivergentValues.clear();
+  DivergencePropagator DP(F, TTI,
+                          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                          getAnalysis<PostDominatorTree>(), DivergentValues);
+  DP.populateWithSourcesOfDivergence();
+  DP.propagate();
+  return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+  if (DivergentValues.empty())
+    return;
+  const Value *FirstDivergentValue = *DivergentValues.begin();
+  const Function *F;
+  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+    F = Arg->getParent();
+  } else if (const Instruction *I =
+                 dyn_cast<Instruction>(FirstDivergentValue)) {
+    F = I->getParent()->getParent();
+  } else {
+    llvm_unreachable("Only arguments and instructions can be divergent");
+  }
+
+  // Dumps all divergent values in F, arguments and then instructions.
+  for (auto &Arg : F->args()) {
+    if (DivergentValues.count(&Arg))
+      OS << "DIVERGENT:  " << Arg << "\n";
+  }
+  // Iterate instructions using instructions() to ensure a deterministic order.
+  for (auto &I : instructions(F)) {
+    if (DivergentValues.count(&I))
+      OS << "DIVERGENT:" << I << "\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 0000000..0c880df
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,254 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees    ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+    BasicBlock *BB = Node->getBlock();
+
+    if (!BB)
+      return "Post dominance root node";
+
+
+    if (isSimple())
+      return DOTGraphTraits<const Function*>
+        ::getSimpleNodeLabel(BB, BB->getParent());
+    else
+      return DOTGraphTraits<const Function*>
+        ::getCompleteNodeLabel(BB, BB->getParent());
+  }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(DominatorTree *DT) {
+    return "Dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+  : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(PostDominatorTree *DT) {
+    return "Post dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+}
+
+namespace {
+struct DominatorTreeWrapperPassAnalysisGraphTraits {
+  static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
+    return &DTWP->getDomTree();
+  }
+};
+
+struct DomViewer : public DOTGraphTraitsViewer<
+                       DominatorTreeWrapperPass, false, DominatorTree *,
+                       DominatorTreeWrapperPassAnalysisGraphTraits> {
+  static char ID;
+  DomViewer()
+      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, false, DominatorTree *,
+                             DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "dom", ID) {
+    initializeDomViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyViewer : public DOTGraphTraitsViewer<
+                           DominatorTreeWrapperPass, true, DominatorTree *,
+                           DominatorTreeWrapperPassAnalysisGraphTraits> {
+  static char ID;
+  DomOnlyViewer()
+      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, true, DominatorTree *,
+                             DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "domonly", ID) {
+    initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+  static char ID;
+  PostDomViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+      initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+      initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+                "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+                "View dominance tree of function (with no function bodies)",
+                false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+                "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+                "View postdominance tree of function "
+                "(with no function bodies)",
+                false, false)
+
+namespace {
+struct DomPrinter : public DOTGraphTraitsPrinter<
+                        DominatorTreeWrapperPass, false, DominatorTree *,
+                        DominatorTreeWrapperPassAnalysisGraphTraits> {
+  static char ID;
+  DomPrinter()
+      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, false, DominatorTree *,
+                              DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "dom", ID) {
+    initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyPrinter : public DOTGraphTraitsPrinter<
+                            DominatorTreeWrapperPass, true, DominatorTree *,
+                            DominatorTreeWrapperPassAnalysisGraphTraits> {
+  static char ID;
+  DomOnlyPrinter()
+      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, true, DominatorTree *,
+                              DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "domonly", ID) {
+    initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+  static char ID;
+  PostDomPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+      initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+      initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+                "Print dominance tree of function to 'dot' file",
+                false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+                "Print dominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+                "Print postdominance tree of function to 'dot' file",
+                false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+                "Print postdominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+  return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+  return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+  return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+  return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+  return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+  return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+  return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+  return new PostDomOnlyViewer();
+}
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 0000000..7ba91bc
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,57 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/DominanceFrontierImpl.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class DominanceFrontierBase<BasicBlock>;
+template class ForwardDominanceFrontierBase<BasicBlock>;
+}
+
+char DominanceFrontier::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+
+DominanceFrontier::DominanceFrontier()
+  : FunctionPass(ID),
+    Base() {
+  initializeDominanceFrontierPass(*PassRegistry::getPassRegistry());
+}
+
+void DominanceFrontier::releaseMemory() {
+  Base.releaseMemory();
+}
+
+bool DominanceFrontier::runOnFunction(Function &) {
+  releaseMemory();
+  Base.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+  return false;
+}
+
+void DominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
+void DominanceFrontier::print(raw_ostream &OS, const Module *) const {
+  Base.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void DominanceFrontier::dump() const {
+  print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
new file mode 100644
index 0000000..01be8b3
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -0,0 +1,106 @@
+//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// See if the given exception handling personality function is one that we
+/// understand.  If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+  const Function *F =
+      Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr;
+  if (!F)
+    return EHPersonality::Unknown;
+  return StringSwitch<EHPersonality>(F->getName())
+    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX)
+    .Case("__gcc_personality_v0",  EHPersonality::GNU_C)
+    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH)
+    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH)
+    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH)
+    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX)
+    .Case("ProcessCLRException",   EHPersonality::CoreCLR)
+    .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
+  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
+  // We can't simplify any invokes to nounwind functions if the personality
+  // function wants to catch asynch exceptions.  The nounwind attribute only
+  // implies that the function does not throw synchronous exceptions.
+  return !isAsynchronousEHPersonality(Personality);
+}
+
+DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
+  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist;
+  BasicBlock *EntryBlock = &F.getEntryBlock();
+  DenseMap<BasicBlock *, ColorVector> BlockColors;
+
+  // Build up the color map, which maps each block to its set of 'colors'.
+  // For any block B the "colors" of B are the set of funclets F (possibly
+  // including a root "funclet" representing the main function) such that
+  // F will need to directly contain B or a copy of B (where the term "directly
+  // contain" is used to distinguish from being "transitively contained" in
+  // a nested funclet).
+  //
+  // Note: Despite not being a funclet in the truest sense, a catchswitch is
+  // considered to belong to its own funclet for the purposes of coloring.
+
+  DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for "
+                                                  << F.getName() << "\n");
+
+  Worklist.push_back({EntryBlock, EntryBlock});
+
+  while (!Worklist.empty()) {
+    BasicBlock *Visiting;
+    BasicBlock *Color;
+    std::tie(Visiting, Color) = Worklist.pop_back_val();
+    DEBUG_WITH_TYPE("winehprepare-coloring",
+                    dbgs() << "Visiting " << Visiting->getName() << ", "
+                           << Color->getName() << "\n");
+    Instruction *VisitingHead = Visiting->getFirstNonPHI();
+    if (VisitingHead->isEHPad()) {
+      // Mark this funclet head as a member of itself.
+      Color = Visiting;
+    }
+    // Note that this is a member of the given color.
+    ColorVector &Colors = BlockColors[Visiting];
+    if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+      Colors.push_back(Color);
+    else
+      continue;
+
+    DEBUG_WITH_TYPE("winehprepare-coloring",
+                    dbgs() << "  Assigned color \'" << Color->getName()
+                           << "\' to block \'" << Visiting->getName()
+                           << "\'.\n");
+
+    BasicBlock *SuccColor = Color;
+    TerminatorInst *Terminator = Visiting->getTerminator();
+    if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
+      Value *ParentPad = CatchRet->getParentPad();
+      if (isa<ConstantTokenNone>(ParentPad))
+        SuccColor = EntryBlock;
+      else
+        SuccColor = cast<Instruction>(ParentPad)->getParent();
+    }
+
+    for (BasicBlock *Succ : successors(Visiting))
+      Worklist.push_back({Succ, SuccColor});
+  }
+  return BlockColors;
+}
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
new file mode 100644
index 0000000..249f395
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -0,0 +1,987 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure").  For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "globalsmodref-aa"
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+          "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+// An option to enable unsafe alias results from the GlobalsModRef analysis.
+// When enabled, GlobalsModRef will provide no-alias results which in extremely
+// rare cases may not be conservatively correct. In particular, in the face of
+// transforms which cause assymetry between how effective GetUnderlyingObject
+// is for two pointers, it may produce incorrect results.
+//
+// These unsafe results have been returned by GMR for many years without
+// causing significant issues in the wild and so we provide a mechanism to
+// re-enable them for users of LLVM that have a particular performance
+// sensitivity and no known issues. The option also makes it easy to evaluate
+// the performance impact of these results.
+static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults(
+    "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden);
+
+/// The mod/ref information collected for a particular function.
+///
+/// We collect information about mod/ref behavior of a function here, both in
+/// general and as pertains to specific globals. We only have this detailed
+/// information when we know *something* useful about the behavior. If we
+/// saturate to fully general mod/ref, we remove the info for the function.
+class GlobalsAAResult::FunctionInfo {
+  typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType;
+
+  /// Build a wrapper struct that has 8-byte alignment. All heap allocations
+  /// should provide this much alignment at least, but this makes it clear we
+  /// specifically rely on this amount of alignment.
+  struct LLVM_ALIGNAS(8) AlignedMap {
+    AlignedMap() {}
+    AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+    GlobalInfoMapType Map;
+  };
+
+  /// Pointer traits for our aligned map.
+  struct AlignedMapPointerTraits {
+    static inline void *getAsVoidPointer(AlignedMap *P) { return P; }
+    static inline AlignedMap *getFromVoidPointer(void *P) {
+      return (AlignedMap *)P;
+    }
+    enum { NumLowBitsAvailable = 3 };
+    static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+                  "AlignedMap insufficiently aligned to have enough low bits.");
+  };
+
+  /// The bit that flags that this function may read any global. This is
+  /// chosen to mix together with ModRefInfo bits.
+  enum { MayReadAnyGlobal = 4 };
+
+  /// Checks to document the invariants of the bit packing here.
+  static_assert((MayReadAnyGlobal & MRI_ModRef) == 0,
+                "ModRef and the MayReadAnyGlobal flag bits overlap.");
+  static_assert(((MayReadAnyGlobal | MRI_ModRef) >>
+                 AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
+                "Insufficient low bits to store our flag and ModRef info.");
+
+public:
+  FunctionInfo() : Info() {}
+  ~FunctionInfo() {
+    delete Info.getPointer();
+  }
+  // Spell out the copy ond move constructors and assignment operators to get
+  // deep copy semantics and correct move semantics in the face of the
+  // pointer-int pair.
+  FunctionInfo(const FunctionInfo &Arg)
+      : Info(nullptr, Arg.Info.getInt()) {
+    if (const auto *ArgPtr = Arg.Info.getPointer())
+      Info.setPointer(new AlignedMap(*ArgPtr));
+  }
+  FunctionInfo(FunctionInfo &&Arg)
+      : Info(Arg.Info.getPointer(), Arg.Info.getInt()) {
+    Arg.Info.setPointerAndInt(nullptr, 0);
+  }
+  FunctionInfo &operator=(const FunctionInfo &RHS) {
+    delete Info.getPointer();
+    Info.setPointerAndInt(nullptr, RHS.Info.getInt());
+    if (const auto *RHSPtr = RHS.Info.getPointer())
+      Info.setPointer(new AlignedMap(*RHSPtr));
+    return *this;
+  }
+  FunctionInfo &operator=(FunctionInfo &&RHS) {
+    delete Info.getPointer();
+    Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt());
+    RHS.Info.setPointerAndInt(nullptr, 0);
+    return *this;
+  }
+
+  /// Returns the \c ModRefInfo info for this function.
+  ModRefInfo getModRefInfo() const {
+    return ModRefInfo(Info.getInt() & MRI_ModRef);
+  }
+
+  /// Adds new \c ModRefInfo for this function to its state.
+  void addModRefInfo(ModRefInfo NewMRI) {
+    Info.setInt(Info.getInt() | NewMRI);
+  }
+
+  /// Returns whether this function may read any global variable, and we don't
+  /// know which global.
+  bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; }
+
+  /// Sets this function as potentially reading from any global.
+  void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); }
+
+  /// Returns the \c ModRefInfo info for this function w.r.t. a particular
+  /// global, which may be more precise than the general information above.
+  ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
+    ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef;
+    if (AlignedMap *P = Info.getPointer()) {
+      auto I = P->Map.find(&GV);
+      if (I != P->Map.end())
+        GlobalMRI = ModRefInfo(GlobalMRI | I->second);
+    }
+    return GlobalMRI;
+  }
+
+  /// Add mod/ref info from another function into ours, saturating towards
+  /// MRI_ModRef.
+  void addFunctionInfo(const FunctionInfo &FI) {
+    addModRefInfo(FI.getModRefInfo());
+
+    if (FI.mayReadAnyGlobal())
+      setMayReadAnyGlobal();
+
+    if (AlignedMap *P = FI.Info.getPointer())
+      for (const auto &G : P->Map)
+        addModRefInfoForGlobal(*G.first, G.second);
+  }
+
+  void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) {
+    AlignedMap *P = Info.getPointer();
+    if (!P) {
+      P = new AlignedMap();
+      Info.setPointer(P);
+    }
+    auto &GlobalMRI = P->Map[&GV];
+    GlobalMRI = ModRefInfo(GlobalMRI | NewMRI);
+  }
+
+  /// Clear a global's ModRef info. Should be used when a global is being
+  /// deleted.
+  void eraseModRefInfoForGlobal(const GlobalValue &GV) {
+    if (AlignedMap *P = Info.getPointer())
+      P->Map.erase(&GV);
+  }
+
+private:
+  /// All of the information is encoded into a single pointer, with a three bit
+  /// integer in the low three bits. The high bit provides a flag for when this
+  /// function may read any global. The low two bits are the ModRefInfo. And
+  /// the pointer, when non-null, points to a map from GlobalValue to
+  /// ModRefInfo specific to that GlobalValue.
+  PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info;
+};
+
+void GlobalsAAResult::DeletionCallbackHandle::deleted() {
+  Value *V = getValPtr();
+  if (auto *F = dyn_cast<Function>(V))
+    GAR->FunctionInfos.erase(F);
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (GAR->NonAddressTakenGlobals.erase(GV)) {
+      // This global might be an indirect global.  If so, remove it and
+      // remove any AllocRelatedValues for it.
+      if (GAR->IndirectGlobals.erase(GV)) {
+        // Remove any entries in AllocsForIndirectGlobals for this global.
+        for (auto I = GAR->AllocsForIndirectGlobals.begin(),
+                  E = GAR->AllocsForIndirectGlobals.end();
+             I != E; ++I)
+          if (I->second == GV)
+            GAR->AllocsForIndirectGlobals.erase(I);
+      }
+
+      // Scan the function info we have collected and remove this global
+      // from all of them.
+      for (auto &FIPair : GAR->FunctionInfos)
+        FIPair.second.eraseModRefInfoForGlobal(*GV);
+    }
+  }
+
+  // If this is an allocation related to an indirect global, remove it.
+  GAR->AllocsForIndirectGlobals.erase(V);
+
+  // And clear out the handle.
+  setValPtr(nullptr);
+  GAR->Handles.erase(I);
+  // This object is now destroyed!
+}
+
+FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  if (FunctionInfo *FI = getFunctionInfo(F)) {
+    if (FI->getModRefInfo() == MRI_NoModRef)
+      Min = FMRB_DoesNotAccessMemory;
+    else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+      Min = FMRB_OnlyReadsMemory;
+  }
+
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+}
+
+FunctionModRefBehavior
+GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  if (const Function *F = CS.getCalledFunction())
+    if (FunctionInfo *FI = getFunctionInfo(F)) {
+      if (FI->getModRefInfo() == MRI_NoModRef)
+        Min = FMRB_DoesNotAccessMemory;
+      else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+        Min = FMRB_OnlyReadsMemory;
+    }
+
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+/// Returns the function info for the function, or null if we don't have
+/// anything useful to say about it.
+GlobalsAAResult::FunctionInfo *
+GlobalsAAResult::getFunctionInfo(const Function *F) {
+  auto I = FunctionInfos.find(F);
+  if (I != FunctionInfos.end())
+    return &I->second;
+  return nullptr;
+}
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program.  If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsAAResult::AnalyzeGlobals(Module &M) {
+  SmallPtrSet<Function *, 64> TrackedFunctions;
+  for (Function &F : M)
+    if (F.hasLocalLinkage())
+      if (!AnalyzeUsesOfPointer(&F)) {
+        // Remember that we are tracking this global.
+        NonAddressTakenGlobals.insert(&F);
+        TrackedFunctions.insert(&F);
+        Handles.emplace_front(*this, &F);
+        Handles.front().I = Handles.begin();
+        ++NumNonAddrTakenFunctions;
+      }
+
+  SmallPtrSet<Function *, 64> Readers, Writers;
+  for (GlobalVariable &GV : M.globals())
+    if (GV.hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(&GV, &Readers,
+                                GV.isConstant() ? nullptr : &Writers)) {
+        // Remember that we are tracking this global, and the mod/ref fns
+        NonAddressTakenGlobals.insert(&GV);
+        Handles.emplace_front(*this, &GV);
+        Handles.front().I = Handles.begin();
+
+        for (Function *Reader : Readers) {
+          if (TrackedFunctions.insert(Reader).second) {
+            Handles.emplace_front(*this, Reader);
+            Handles.front().I = Handles.begin();
+          }
+          FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref);
+        }
+
+        if (!GV.isConstant()) // No need to keep track of writers to constants
+          for (Function *Writer : Writers) {
+            if (TrackedFunctions.insert(Writer).second) {
+              Handles.emplace_front(*this, Writer);
+              Handles.front().I = Handles.begin();
+            }
+            FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod);
+          }
+        ++NumNonAddrTakenGlobalVars;
+
+        // If this global holds a pointer type, see if it is an indirect global.
+        if (GV.getType()->getElementType()->isPointerTy() &&
+            AnalyzeIndirectGlobalMemory(&GV))
+          ++NumIndirectGlobalVars;
+      }
+      Readers.clear();
+      Writers.clear();
+    }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true.  Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
+                                           SmallPtrSetImpl<Function *> *Readers,
+                                           SmallPtrSetImpl<Function *> *Writers,
+                                           GlobalValue *OkayStoreDest) {
+  if (!V->getType()->isPointerTy())
+    return true;
+
+  for (Use &U : V->uses()) {
+    User *I = U.getUser();
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      if (Readers)
+        Readers->insert(LI->getParent()->getParent());
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      if (V == SI->getOperand(1)) {
+        if (Writers)
+          Writers->insert(SI->getParent()->getParent());
+      } else if (SI->getOperand(1) != OkayStoreDest) {
+        return true; // Storing the pointer
+      }
+    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers))
+        return true;
+    } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
+        return true;
+    } else if (auto CS = CallSite(I)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      if (CS.isDataOperand(&U)) {
+        // Detect calls to free.
+        if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+          if (Writers)
+            Writers->insert(CS->getParent()->getParent());
+        } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
+          Function *ParentF = CS->getParent()->getParent();
+          // A nocapture argument may be read from or written to, but does not
+          // escape unless the call can somehow recurse.
+          //
+          // nocapture "indicates that the callee does not make any copies of
+          // the pointer that outlive itself". Therefore if we directly or
+          // indirectly recurse, we must treat the pointer as escaping.
+          if (FunctionToSCCMap[ParentF] ==
+              FunctionToSCCMap[CS.getCalledFunction()])
+            return true;
+          if (Readers)
+            Readers->insert(ParentF);
+          if (Writers)
+            Writers->insert(ParentF);
+        } else {
+          return true; // Argument of an unknown call.
+        }
+      }
+    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return true; // Allow comparison against null.
+    } else {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type.  See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere.  If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
+  // Keep track of values related to the allocation of the memory, f.e. the
+  // value produced by the malloc call and any casts.
+  std::vector<Value *> AllocRelatedValues;
+
+  // If the initializer is a valid pointer, bail.
+  if (Constant *C = GV->getInitializer())
+    if (!C->isNullValue())
+      return false;
+    
+  // Walk the user list of the global.  If we find anything other than a direct
+  // load or store, bail out.
+  for (User *U : GV->users()) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // The pointer loaded from the global can only be used in simple ways:
+      // we allow addressing of it and loading storing to it.  We do *not* allow
+      // storing the loaded pointer somewhere else or passing to a function.
+      if (AnalyzeUsesOfPointer(LI))
+        return false; // Loaded pointer escapes.
+      // TODO: Could try some IP mod/ref of the loaded pointer.
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Storing the global itself.
+      if (SI->getOperand(0) == GV)
+        return false;
+
+      // If storing the null pointer, ignore it.
+      if (isa<ConstantPointerNull>(SI->getOperand(0)))
+        continue;
+
+      // Check the value being stored.
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+                                       GV->getParent()->getDataLayout());
+
+      if (!isAllocLikeFn(Ptr, &TLI))
+        return false; // Too hard to analyze.
+
+      // Analyze all uses of the allocation.  If any of them are used in a
+      // non-simple way (e.g. stored to another global) bail out.
+      if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr,
+                               GV))
+        return false; // Loaded pointer escapes.
+
+      // Remember that this allocation is related to the indirect global.
+      AllocRelatedValues.push_back(Ptr);
+    } else {
+      // Something complex, bail out.
+      return false;
+    }
+  }
+
+  // Okay, this is an indirect global.  Remember all of the allocations for
+  // this global in AllocsForIndirectGlobals.
+  while (!AllocRelatedValues.empty()) {
+    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+    Handles.emplace_front(*this, AllocRelatedValues.back());
+    Handles.front().I = Handles.begin();
+    AllocRelatedValues.pop_back();
+  }
+  IndirectGlobals.insert(GV);
+  Handles.emplace_front(*this, GV);
+  Handles.front().I = Handles.begin();
+  return true;
+}
+
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {  
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  unsigned SCCID = 0;
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    for (auto *CGN : SCC)
+      if (Function *F = CGN->getFunction())
+        FunctionToSCCMap[F] = SCCID;
+    ++SCCID;
+  }
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from.  Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) {
+      // Calls externally or is weak - can't say anything useful. Remove any existing
+      // function records (may have been created when scanning globals).
+      for (auto *Node : SCC)
+        FunctionInfos.erase(Node->getFunction());
+      continue;
+    }
+
+    FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+    bool KnowNothing = false;
+
+    // Collect the mod/ref properties due to called functions.  We only compute
+    // one mod-ref set.
+    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+      Function *F = SCC[i]->getFunction();
+      if (!F) {
+        KnowNothing = true;
+        break;
+      }
+
+      if (F->isDeclaration()) {
+        // Try to get mod/ref behaviour from function attributes.
+        if (F->doesNotAccessMemory()) {
+          // Can't do better than that!
+        } else if (F->onlyReadsMemory()) {
+          FI.addModRefInfo(MRI_Ref);
+          if (!F->isIntrinsic())
+            // This function might call back into the module and read a global -
+            // consider every global as possibly being read by this function.
+            FI.setMayReadAnyGlobal();
+        } else {
+          FI.addModRefInfo(MRI_ModRef);
+          // Can't say anything useful unless it's an intrinsic - they don't
+          // read or write global variables of the kind considered here.
+          KnowNothing = !F->isIntrinsic();
+        }
+        continue;
+      }
+
+      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+           CI != E && !KnowNothing; ++CI)
+        if (Function *Callee = CI->second->getFunction()) {
+          if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) {
+            // Propagate function effect up.
+            FI.addFunctionInfo(*CalleeFI);
+          } else {
+            // Can't say anything about it.  However, if it is inside our SCC,
+            // then nothing needs to be done.
+            CallGraphNode *CalleeNode = CG[Callee];
+            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+              KnowNothing = true;
+          }
+        } else {
+          KnowNothing = true;
+        }
+    }
+
+    // If we can't say anything useful about this SCC, remove all SCC functions
+    // from the FunctionInfos map.
+    if (KnowNothing) {
+      for (auto *Node : SCC)
+        FunctionInfos.erase(Node->getFunction());
+      continue;
+    }
+
+    // Scan the function bodies for explicit loads or stores.
+    for (auto *Node : SCC) {
+      if (FI.getModRefInfo() == MRI_ModRef)
+        break; // The mod/ref lattice saturates here.
+      for (Instruction &I : instructions(Node->getFunction())) {
+        if (FI.getModRefInfo() == MRI_ModRef)
+          break; // The mod/ref lattice saturates here.
+
+        // We handle calls specially because the graph-relevant aspects are
+        // handled above.
+        if (auto CS = CallSite(&I)) {
+          if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+            // FIXME: It is completely unclear why this is necessary and not
+            // handled by the above graph code.
+            FI.addModRefInfo(MRI_ModRef);
+          } else if (Function *Callee = CS.getCalledFunction()) {
+            // The callgraph doesn't include intrinsic calls.
+            if (Callee->isIntrinsic()) {
+              FunctionModRefBehavior Behaviour =
+                  AAResultBase::getModRefBehavior(Callee);
+              FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef));
+            }
+          }
+          continue;
+        }
+
+        // All non-call instructions we use the primary predicates for whether
+        // thay read or write memory.
+        if (I.mayReadFromMemory())
+          FI.addModRefInfo(MRI_Ref);
+        if (I.mayWriteToMemory())
+          FI.addModRefInfo(MRI_Mod);
+      }
+    }
+
+    if ((FI.getModRefInfo() & MRI_Mod) == 0)
+      ++NumReadMemFunctions;
+    if (FI.getModRefInfo() == MRI_NoModRef)
+      ++NumNoMemFunctions;
+
+    // Finally, now that we know the full effect on this SCC, clone the
+    // information to each function in the SCC.
+    // FI is a reference into FunctionInfos, so copy it now so that it doesn't
+    // get invalidated if DenseMap decides to re-hash.
+    FunctionInfo CachedFI = FI;
+    for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+      FunctionInfos[SCC[i]->getFunction()] = CachedFI;
+  }
+}
+
+// GV is a non-escaping global. V is a pointer address that has been loaded from.
+// If we can prove that V must escape, we can conclude that a load from V cannot
+// alias GV.
+static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
+                                               const Value *V,
+                                               int &Depth,
+                                               const DataLayout &DL) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Inputs;
+  Visited.insert(V);
+  Inputs.push_back(V);
+  do {
+    const Value *Input = Inputs.pop_back_val();
+    
+    if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
+        isa<InvokeInst>(Input))
+      // Arguments to functions or returns from functions are inherently
+      // escaping, so we can immediately classify those as not aliasing any
+      // non-addr-taken globals.
+      //
+      // (Transitive) loads from a global are also safe - if this aliased
+      // another global, its address would escape, so no alias.
+      continue;
+
+    // Recurse through a limited number of selects, loads and PHIs. This is an
+    // arbitrary depth of 4, lower numbers could be used to fix compile time
+    // issues if needed, but this is generally expected to be only be important
+    // for small depths.
+    if (++Depth > 4)
+      return false;
+
+    if (auto *LI = dyn_cast<LoadInst>(Input)) {
+      Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
+      continue;
+    }  
+    if (auto *SI = dyn_cast<SelectInst>(Input)) {
+      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+      if (Visited.insert(LHS).second)
+        Inputs.push_back(LHS);
+      if (Visited.insert(RHS).second)
+        Inputs.push_back(RHS);
+      continue;
+    }
+    if (auto *PN = dyn_cast<PHINode>(Input)) {
+      for (const Value *Op : PN->incoming_values()) {
+        Op = GetUnderlyingObject(Op, DL);
+        if (Visited.insert(Op).second)
+          Inputs.push_back(Op);
+      }
+      continue;
+    }
+    
+    return false;
+  } while (!Inputs.empty());
+
+  // All inputs were known to be no-alias.
+  return true;
+}
+
+// There are particular cases where we can conclude no-alias between
+// a non-addr-taken global and some other underlying object. Specifically,
+// a non-addr-taken global is known to not be escaped from any function. It is
+// also incorrect for a transformation to introduce an escape of a global in
+// a way that is observable when it was not there previously. One function
+// being transformed to introduce an escape which could possibly be observed
+// (via loading from a global or the return value for example) within another
+// function is never safe. If the observation is made through non-atomic
+// operations on different threads, it is a data-race and UB. If the
+// observation is well defined, by being observed the transformation would have
+// changed program behavior by introducing the observed escape, making it an
+// invalid transform.
+//
+// This property does require that transformations which *temporarily* escape
+// a global that was not previously escaped, prior to restoring it, cannot rely
+// on the results of GMR::alias. This seems a reasonable restriction, although
+// currently there is no way to enforce it. There is also no realistic
+// optimization pass that would make this mistake. The closest example is
+// a transformation pass which does reg2mem of SSA values but stores them into
+// global variables temporarily before restoring the global variable's value.
+// This could be useful to expose "benign" races for example. However, it seems
+// reasonable to require that a pass which introduces escapes of global
+// variables in this way to either not trust AA results while the escape is
+// active, or to be forced to operate as a module pass that cannot co-exist
+// with an alias analysis such as GMR.
+bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
+                                                 const Value *V) {
+  // In order to know that the underlying object cannot alias the
+  // non-addr-taken global, we must know that it would have to be an escape.
+  // Thus if the underlying object is a function argument, a load from
+  // a global, or the return of a function, it cannot alias. We can also
+  // recurse through PHI nodes and select nodes provided all of their inputs
+  // resolve to one of these known-escaping roots.
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Inputs;
+  Visited.insert(V);
+  Inputs.push_back(V);
+  int Depth = 0;
+  do {
+    const Value *Input = Inputs.pop_back_val();
+
+    if (auto *InputGV = dyn_cast<GlobalValue>(Input)) {
+      // If one input is the very global we're querying against, then we can't
+      // conclude no-alias.
+      if (InputGV == GV)
+        return false;
+
+      // Distinct GlobalVariables never alias, unless overriden or zero-sized.
+      // FIXME: The condition can be refined, but be conservative for now.
+      auto *GVar = dyn_cast<GlobalVariable>(GV);
+      auto *InputGVar = dyn_cast<GlobalVariable>(InputGV);
+      if (GVar && InputGVar &&
+          !GVar->isDeclaration() && !InputGVar->isDeclaration() &&
+          !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) {
+        Type *GVType = GVar->getInitializer()->getType();
+        Type *InputGVType = InputGVar->getInitializer()->getType();
+        if (GVType->isSized() && InputGVType->isSized() &&
+            (DL.getTypeAllocSize(GVType) > 0) &&
+            (DL.getTypeAllocSize(InputGVType) > 0))
+          continue;
+      }
+
+      // Conservatively return false, even though we could be smarter
+      // (e.g. look through GlobalAliases).
+      return false;
+    }
+
+    if (isa<Argument>(Input) || isa<CallInst>(Input) ||
+        isa<InvokeInst>(Input)) {
+      // Arguments to functions or returns from functions are inherently
+      // escaping, so we can immediately classify those as not aliasing any
+      // non-addr-taken globals.
+      continue;
+    }
+    
+    // Recurse through a limited number of selects, loads and PHIs. This is an
+    // arbitrary depth of 4, lower numbers could be used to fix compile time
+    // issues if needed, but this is generally expected to be only be important
+    // for small depths.
+    if (++Depth > 4)
+      return false;
+
+    if (auto *LI = dyn_cast<LoadInst>(Input)) {
+      // A pointer loaded from a global would have been captured, and we know
+      // that the global is non-escaping, so no alias.
+      const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
+      if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
+        // The load does not alias with GV.
+        continue;
+      // Otherwise, a load could come from anywhere, so bail.
+      return false;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(Input)) {
+      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+      if (Visited.insert(LHS).second)
+        Inputs.push_back(LHS);
+      if (Visited.insert(RHS).second)
+        Inputs.push_back(RHS);
+      continue;
+    }
+    if (auto *PN = dyn_cast<PHINode>(Input)) {
+      for (const Value *Op : PN->incoming_values()) {
+        Op = GetUnderlyingObject(Op, DL);
+        if (Visited.insert(Op).second)
+          Inputs.push_back(Op);
+      }
+      continue;
+    }
+
+    // FIXME: It would be good to handle other obvious no-alias cases here, but
+    // it isn't clear how to do so reasonbly without building a small version
+    // of BasicAA into this code. We could recurse into AAResultBase::alias
+    // here but that seems likely to go poorly as we're inside the
+    // implementation of such a query. Until then, just conservatievly retun
+    // false.
+    return false;
+  } while (!Inputs.empty());
+
+  // If all the inputs to V were definitively no-alias, then V is no-alias.
+  return true;
+}
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
+                                   const MemoryLocation &LocB) {
+  // Get the base object these pointers point to.
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
+
+  // If either of the underlying values is a global, they may be non-addr-taken
+  // globals, which we can answer queries about.
+  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+  if (GV1 || GV2) {
+    // If the global's address is taken, pretend we don't know it's a pointer to
+    // the global.
+    if (GV1 && !NonAddressTakenGlobals.count(GV1))
+      GV1 = nullptr;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2))
+      GV2 = nullptr;
+
+    // If the two pointers are derived from two different non-addr-taken
+    // globals we know these can't alias.
+    if (GV1 && GV2 && GV1 != GV2)
+      return NoAlias;
+
+    // If one is and the other isn't, it isn't strictly safe but we can fake
+    // this result if necessary for performance. This does not appear to be
+    // a common problem in practice.
+    if (EnableUnsafeGlobalsModRefAliasResults)
+      if ((GV1 || GV2) && GV1 != GV2)
+        return NoAlias;
+
+    // Check for a special case where a non-escaping global can be used to
+    // conclude no-alias.
+    if ((GV1 || GV2) && GV1 != GV2) {
+      const GlobalValue *GV = GV1 ? GV1 : GV2;
+      const Value *UV = GV1 ? UV2 : UV1;
+      if (isNonEscapingGlobalNoAlias(GV, UV))
+        return NoAlias;
+    }
+
+    // Otherwise if they are both derived from the same addr-taken global, we
+    // can't know the two accesses don't overlap.
+  }
+
+  // These pointers may be based on the memory owned by an indirect global.  If
+  // so, we may be able to handle this.  First check to see if the base pointer
+  // is a direct load from an indirect global.
+  GV1 = GV2 = nullptr;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV1 = GV;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV2 = GV;
+
+  // These pointers may also be from an allocation for the indirect global.  If
+  // so, also handle them.
+  if (!GV1)
+    GV1 = AllocsForIndirectGlobals.lookup(UV1);
+  if (!GV2)
+    GV2 = AllocsForIndirectGlobals.lookup(UV2);
+
+  // Now that we know whether the two pointers are related to indirect globals,
+  // use this to disambiguate the pointers. If the pointers are based on
+  // different indirect globals they cannot alias.
+  if (GV1 && GV2 && GV1 != GV2)
+    return NoAlias;
+
+  // If one is based on an indirect global and the other isn't, it isn't
+  // strictly safe but we can fake this result if necessary for performance.
+  // This does not appear to be a common problem in practice.
+  if (EnableUnsafeGlobalsModRefAliasResults)
+    if ((GV1 || GV2) && GV1 != GV2)
+      return NoAlias;
+
+  return AAResultBase::alias(LocA, LocB);
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+                                                     const GlobalValue *GV) {
+  if (CS.doesNotAccessMemory())
+    return MRI_NoModRef;
+  ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+  
+  // Iterate through all the arguments to the called function. If any argument
+  // is based on GV, return the conservative result.
+  for (auto &A : CS.args()) {
+    SmallVector<Value*, 4> Objects;
+    GetUnderlyingObjects(A, Objects, DL);
+    
+    // All objects must be identified.
+    if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject))
+      return ConservativeResult;
+
+    if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+      return ConservativeResult;
+  }
+
+  // We identified all objects in the argument list, and none of them were GV.
+  return MRI_NoModRef;
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+                                          const MemoryLocation &Loc) {
+  unsigned Known = MRI_ModRef;
+
+  // If we are asking for mod/ref info of a direct call with a pointer to a
+  // global we are tracking, return information if we have it.
+  if (const GlobalValue *GV =
+          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
+    if (GV->hasLocalLinkage())
+      if (const Function *F = CS.getCalledFunction())
+        if (NonAddressTakenGlobals.count(GV))
+          if (const FunctionInfo *FI = getFunctionInfo(F))
+            Known = FI->getModRefInfoForGlobal(*GV) |
+              getModRefInfoForArgument(CS, GV);
+
+  if (Known == MRI_NoModRef)
+    return MRI_NoModRef; // No need to query other mod/ref analyses
+  return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc));
+}
+
+GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
+                                 const TargetLibraryInfo &TLI)
+    : AAResultBase(TLI), DL(DL) {}
+
+GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
+    : AAResultBase(std::move(Arg)), DL(Arg.DL),
+      NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
+      IndirectGlobals(std::move(Arg.IndirectGlobals)),
+      AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
+      FunctionInfos(std::move(Arg.FunctionInfos)),
+      Handles(std::move(Arg.Handles)) {
+  // Update the parent for each DeletionCallbackHandle.
+  for (auto &H : Handles) {
+    assert(H.GAR == &Arg);
+    H.GAR = this;
+  }
+}
+
+/*static*/ GlobalsAAResult
+GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+                               CallGraph &CG) {
+  GlobalsAAResult Result(M.getDataLayout(), TLI);
+
+  // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
+  Result.CollectSCCMembership(CG);
+
+  // Find non-addr taken globals.
+  Result.AnalyzeGlobals(M);
+
+  // Propagate on CG.
+  Result.AnalyzeCallGraph(CG, M);
+
+  return Result;
+}
+
+GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) {
+  return GlobalsAAResult::analyzeModule(M,
+                                        AM->getResult<TargetLibraryAnalysis>(M),
+                                        AM->getResult<CallGraphAnalysis>(M));
+}
+
+char GlobalsAA::PassID;
+
+char GlobalsAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
+                      "Globals Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa",
+                    "Globals Alias Analysis", false, true)
+
+ModulePass *llvm::createGlobalsAAWrapperPass() {
+  return new GlobalsAAWrapperPass();
+}
+
+GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
+  initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+  Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
+      M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+  return false;
+}
+
+bool GlobalsAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<CallGraphWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
new file mode 100644
index 0000000..e0c5d8f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,373 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "iv-users"
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+  return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+                          ScalarEvolution *SE, LoopInfo *LI) {
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides unless they're
+    // only used outside the loop and we can simplify them.
+    if (AR->getLoop() == L)
+      return AR->isAffine() ||
+             (!L->contains(I) &&
+              SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
+    // Otherwise recurse to see if the start value is interesting, and that
+    // the step value is not interesting, since we don't yet know how to
+    // do effective SCEV expansions for addrecs with interesting steps.
+    return isInteresting(AR->getStart(), I, L, SE, LI) &&
+          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
+  }
+
+  // An add is interesting if exactly one of its operands is interesting.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    bool AnyInterestingYet = false;
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L, SE, LI)) {
+        if (AnyInterestingYet)
+          return false;
+        AnyInterestingYet = true;
+      }
+    return AnyInterestingYet;
+  }
+
+  // Nothing else is interesting here.
+  return false;
+}
+
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+                                 const LoopInfo *LI,
+                                 SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+  Loop *NearestLoop = nullptr;
+  for (DomTreeNode *Rung = DT->getNode(BB);
+       Rung; Rung = Rung->getIDom()) {
+    BasicBlock *DomBB = Rung->getBlock();
+    Loop *DomLoop = LI->getLoopFor(DomBB);
+    if (DomLoop && DomLoop->getHeader() == DomBB) {
+      // If the domtree walk reaches a loop with no preheader, return false.
+      if (!DomLoop->isLoopSimplifyForm())
+        return false;
+      // If we have already checked this loop nest, stop checking.
+      if (SimpleLoopNests.count(DomLoop))
+        break;
+      // If we have not already checked this loop nest, remember the loop
+      // header nearest to BB. The nearest loop may not contain BB.
+      if (!NearestLoop)
+        NearestLoop = DomLoop;
+    }
+  }
+  if (NearestLoop)
+    SimpleLoopNests.insert(NearestLoop);
+  return true;
+}
+
+/// AddUsersImpl - Inspect the specified instruction.  If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true.  Otherwise, return false.
+bool IVUsers::AddUsersImpl(Instruction *I,
+                           SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+  const DataLayout &DL = I->getModule()->getDataLayout();
+
+  // Add this IV user to the Processed set before returning false to ensure that
+  // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
+  if (!Processed.insert(I).second)
+    return true;    // Instruction already handled.
+
+  if (!SE->isSCEVable(I->getType()))
+    return false;   // Void and FP expressions cannot be reduced.
+
+  // IVUsers is used by LSR which assumes that all SCEV expressions are safe to
+  // pass to SCEVExpander. Expressions are not safe to expand if they represent
+  // operations that are not safe to speculate, namely integer division.
+  if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I))
+    return false;
+
+  // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+  // Also avoid creating IVs of non-native types. For example, we don't want a
+  // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+  uint64_t Width = SE->getTypeSizeInBits(I->getType());
+  if (Width > 64 || !DL.isLegalInteger(Width))
+    return false;
+
+  // Don't attempt to promote ephemeral values to indvars. They will be removed
+  // later anyway.
+  if (EphValues.count(I))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *ISE = SE->getSCEV(I);
+
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L, SE, LI))
+    return false;
+
+  SmallPtrSet<Instruction *, 4> UniqueUsers;
+  for (Use &U : I->uses()) {
+    Instruction *User = cast<Instruction>(U.getUser());
+    if (!UniqueUsers.insert(User).second)
+      continue;
+
+    // Do not infinitely recurse on PHI nodes.
+    if (isa<PHINode>(User) && Processed.count(User))
+      continue;
+
+    // Only consider IVUsers that are dominated by simplified loop
+    // headers. Otherwise, SCEVExpander will crash.
+    BasicBlock *UseBB = User->getParent();
+    // A phi's use is live out of its predecessor block.
+    if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+      unsigned OperandNo = U.getOperandNo();
+      unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+      UseBB = PHI->getIncomingBlock(ValNo);
+    }
+    if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+      return false;
+
+    // Descend recursively, but not into PHI nodes outside the current loop.
+    // It's important to see the entire expression outside the loop to get
+    // choices that depend on addressing mode use right, although we won't
+    // consider references outside the loop in all cases.
+    // If User is already in Processed, we don't want to recurse into it again,
+    // but do want to record a second reference in the same instruction.
+    bool AddUserToIVUsers = false;
+    if (LI->getLoopFor(User->getParent()) != L) {
+      if (isa<PHINode>(User) || Processed.count(User) ||
+          !AddUsersImpl(User, SimpleLoopNests)) {
+        DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+                     << "   OF SCEV: " << *ISE << '\n');
+        AddUserToIVUsers = true;
+      }
+    } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
+      DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+                   << "   OF SCEV: " << *ISE << '\n');
+      AddUserToIVUsers = true;
+    }
+
+    if (AddUserToIVUsers) {
+      // Okay, we found a user that we cannot reduce.
+      IVStrideUse &NewUse = AddUser(User, I);
+      // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+      // The regular return value here is discarded; instead of recording
+      // it, we just recompute it when we need it.
+      const SCEV *OriginalISE = ISE;
+      ISE = TransformForPostIncUse(NormalizeAutodetect,
+                                   ISE, User, I,
+                                   NewUse.PostIncLoops,
+                                   *SE, *DT);
+
+      // PostIncNormalization effectively simplifies the expression under
+      // pre-increment assumptions. Those assumptions (no wrapping) might not
+      // hold for the post-inc value. Catch such cases by making sure the
+      // transformation is invertible.
+      if (OriginalISE != ISE) {
+        const SCEV *DenormalizedISE =
+          TransformForPostIncUse(Denormalize, ISE, User, I,
+              NewUse.PostIncLoops, *SE, *DT);
+
+        // If we normalized the expression, but denormalization doesn't give the
+        // original one, discard this user.
+        if (OriginalISE != DenormalizedISE) {
+          DEBUG(dbgs() << "   DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+                       << *ISE << '\n');
+          IVUses.pop_back();
+          return false;
+        }
+      }
+      DEBUG(if (SE->getSCEV(I) != ISE)
+              dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
+    }
+  }
+  return true;
+}
+
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+  // SCEVExpander can only handle users that are dominated by simplified loop
+  // entries. Keep track of all loops that are only dominated by other simple
+  // loops so we don't traverse the domtree for each user.
+  SmallPtrSet<Loop*,16> SimpleLoopNests;
+
+  return AddUsersImpl(I, SimpleLoopNests);
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
+  return IVUses.back();
+}
+
+IVUsers::IVUsers()
+    : LoopPass(ID) {
+  initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+  L = l;
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+      *L->getHeader()->getParent());
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+  // Collect ephemeral values so that AddUsersIfInteresting skips them.
+  EphValues.clear();
+  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+  // Find all uses of induction variables in this loop, and categorize
+  // them by stride.  Start by finding all of the PHI nodes in the header for
+  // this loop.  If they are induction variables, inspect their uses.
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+    (void)AddUsersIfInteresting(&*I);
+
+  return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+  OS << "IV Users for loop ";
+  L->getHeader()->printAsOperand(OS, false);
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << " with backedge-taken count "
+       << *SE->getBackedgeTakenCount(L);
+  }
+  OS << ":\n";
+
+  for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+       E = IVUses.end(); UI != E; ++UI) {
+    OS << "  ";
+    UI->getOperandValToReplace()->printAsOperand(OS, false);
+    OS << " = " << *getReplacementExpr(*UI);
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      (*I)->getHeader()->printAsOperand(OS, false);
+      OS << ")";
+    }
+    OS << " in  ";
+    if (UI->getUser())
+      UI->getUser()->print(OS);
+    else
+      OS << "Printing <null> User";
+    OS << '\n';
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void IVUsers::dump() const {
+  print(dbgs());
+}
+#endif
+
+void IVUsers::releaseMemory() {
+  Processed.clear();
+  IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+  return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+  return
+    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+                           IU.getUser(), IU.getOperandValToReplace(),
+                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+                           *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return nullptr;
+  }
+
+  return nullptr;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+    return AR->getStepRecurrence(*SE);
+  return nullptr;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+  // Remove this user from the list.
+  Parent->Processed.erase(this->getUser());
+  Parent->IVUses.erase(this);
+  // this now dangles!
+}
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
new file mode 100644
index 0000000..a86a703
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,1425 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline-cost"
+
+STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+  typedef InstVisitor<CallAnalyzer, bool> Base;
+  friend class InstVisitor<CallAnalyzer, bool>;
+
+  /// The TargetTransformInfo available for this compilation.
+  const TargetTransformInfo &TTI;
+
+  /// The cache of @llvm.assume intrinsics.
+  AssumptionCacheTracker *ACT;
+
+  // The called function.
+  Function &F;
+
+  // The candidate callsite being analyzed. Please do not use this to do
+  // analysis in the caller function; we want the inline cost query to be
+  // easily cacheable. Instead, use the cover function paramHasAttr.
+  CallSite CandidateCS;
+
+  int Threshold;
+  int Cost;
+
+  bool IsCallerRecursive;
+  bool IsRecursiveCall;
+  bool ExposesReturnsTwice;
+  bool HasDynamicAlloca;
+  bool ContainsNoDuplicateCall;
+  bool HasReturn;
+  bool HasIndirectBr;
+  bool HasFrameEscape;
+
+  /// Number of bytes allocated statically by the callee.
+  uint64_t AllocatedSize;
+  unsigned NumInstructions, NumVectorInstructions;
+  int FiftyPercentVectorBonus, TenPercentVectorBonus;
+  int VectorBonus;
+
+  // While we walk the potentially-inlined instructions, we build up and
+  // maintain a mapping of simplified values specific to this callsite. The
+  // idea is to propagate any special information we have about arguments to
+  // this call through the inlinable section of the function, and account for
+  // likely simplifications post-inlining. The most important aspect we track
+  // is CFG altering simplifications -- when we prove a basic block dead, that
+  // can cause dramatic shifts in the cost of inlining a function.
+  DenseMap<Value *, Constant *> SimplifiedValues;
+
+  // Keep track of the values which map back (through function arguments) to
+  // allocas on the caller stack which could be simplified through SROA.
+  DenseMap<Value *, Value *> SROAArgValues;
+
+  // The mapping of caller Alloca values to their accumulated cost savings. If
+  // we have to disable SROA for one of the allocas, this tells us how much
+  // cost must be added.
+  DenseMap<Value *, int> SROAArgCosts;
+
+  // Keep track of values which map to a pointer base and constant offset.
+  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+  // Custom simplification helper routines.
+  bool isAllocaDerivedArg(Value *V);
+  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+                            DenseMap<Value *, int>::iterator &CostIt);
+  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+  void disableSROA(Value *V);
+  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                          int InstructionCost);
+  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+  bool simplifyCallSite(Function *F, CallSite CS);
+  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+  /// Return true if the given argument to the function being considered for
+  /// inlining has the given attribute set either at the call site or the
+  /// function declaration.  Primarily used to inspect call site specific
+  /// attributes since these can be more precise than the ones on the callee
+  /// itself.
+  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
+  
+  /// Return true if the given value is known non null within the callee if
+  /// inlined through this particular callsite.
+  bool isKnownNonNullInCallee(Value *V);
+
+  // Custom analysis routines.
+  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
+
+  // Disable several entry points to the visitor so we don't accidentally use
+  // them by declaring but not defining them here.
+  void visit(Module *);     void visit(Module &);
+  void visit(Function *);   void visit(Function &);
+  void visit(BasicBlock *); void visit(BasicBlock &);
+
+  // Provide base case for our instruction visit.
+  bool visitInstruction(Instruction &I);
+
+  // Our visit overrides.
+  bool visitAlloca(AllocaInst &I);
+  bool visitPHI(PHINode &I);
+  bool visitGetElementPtr(GetElementPtrInst &I);
+  bool visitBitCast(BitCastInst &I);
+  bool visitPtrToInt(PtrToIntInst &I);
+  bool visitIntToPtr(IntToPtrInst &I);
+  bool visitCastInst(CastInst &I);
+  bool visitUnaryInstruction(UnaryInstruction &I);
+  bool visitCmpInst(CmpInst &I);
+  bool visitSub(BinaryOperator &I);
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitLoad(LoadInst &I);
+  bool visitStore(StoreInst &I);
+  bool visitExtractValue(ExtractValueInst &I);
+  bool visitInsertValue(InsertValueInst &I);
+  bool visitCallSite(CallSite CS);
+  bool visitReturnInst(ReturnInst &RI);
+  bool visitBranchInst(BranchInst &BI);
+  bool visitSwitchInst(SwitchInst &SI);
+  bool visitIndirectBrInst(IndirectBrInst &IBI);
+  bool visitResumeInst(ResumeInst &RI);
+  bool visitCleanupReturnInst(CleanupReturnInst &RI);
+  bool visitCatchReturnInst(CatchReturnInst &RI);
+  bool visitUnreachableInst(UnreachableInst &I);
+
+public:
+  CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
+               Function &Callee, int Threshold, CallSite CSArg)
+    : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
+        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+        ExposesReturnsTwice(false), HasDynamicAlloca(false),
+        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
+
+  bool analyzeCall(CallSite CS);
+
+  int getThreshold() { return Threshold; }
+  int getCost() { return Cost; }
+
+  // Keep a bunch of stats about the cost savings found so we can print them
+  // out when debugging.
+  unsigned NumConstantArgs;
+  unsigned NumConstantOffsetPtrArgs;
+  unsigned NumAllocaArgs;
+  unsigned NumConstantPtrCmps;
+  unsigned NumConstantPtrDiffs;
+  unsigned NumInstructionsSimplified;
+  unsigned SROACostSavings;
+  unsigned SROACostSavingsLost;
+
+  void dump();
+};
+
+} // namespace
+
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+  return SROAArgValues.count(V);
+}
+
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+  if (SROAArgValues.empty() || SROAArgCosts.empty())
+    return false;
+
+  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+  if (ArgIt == SROAArgValues.end())
+    return false;
+
+  Arg = ArgIt->second;
+  CostIt = SROAArgCosts.find(Arg);
+  return CostIt != SROAArgCosts.end();
+}
+
+/// \brief Disable SROA for the candidate marked by this cost iterator.
+///
+/// This marks the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+  // If we're no longer able to perform SROA we need to undo its cost savings
+  // and prevent subsequent analysis.
+  Cost += CostIt->second;
+  SROACostSavings -= CostIt->second;
+  SROACostSavingsLost += CostIt->second;
+  SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+    disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                                      int InstructionCost) {
+  CostIt->second += InstructionCost;
+  SROACostSavings += InstructionCost;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+      return false;
+
+  return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  unsigned IntPtrWidth = DL.getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+        OpC = dyn_cast<ConstantInt>(SimpleOp);
+    if (!OpC)
+      return false;
+    if (OpC->isZero()) continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL.getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+  }
+  return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+  // Check whether inlining will turn a dynamic alloca into a static
+  // alloca, and handle that case.
+  if (I.isArrayAllocation()) {
+    if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
+      ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
+      assert(AllocSize && "Allocation size not a constant int?");
+      Type *Ty = I.getAllocatedType();
+      AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
+      return Base::visitAlloca(I);
+    }
+  }
+
+  // Accumulate the allocated size.
+  if (I.isStaticAlloca()) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    Type *Ty = I.getAllocatedType();
+    AllocatedSize += DL.getTypeAllocSize(Ty);
+  }
+
+  // We will happily inline static alloca instructions.
+  if (I.isStaticAlloca())
+    return Base::visitAlloca(I);
+
+  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+  // a variety of reasons, and so we would like to not inline them into
+  // functions which don't currently have a dynamic alloca. This simply
+  // disables inlining altogether in the presence of a dynamic alloca.
+  HasDynamicAlloca = true;
+  return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+  // FIXME: We should potentially be tracking values through phi nodes,
+  // especially when they collapse to a single value due to deleted CFG edges
+  // during inlining.
+
+  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+  // though we don't want to propagate it's bonuses. The idea is to disable
+  // SROA if it *might* be used in an inappropriate manner.
+
+  // Phi nodes are always zero-cost.
+  return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+                                            SROAArg, CostIt);
+
+  // Try to fold GEPs of constant-offset call site argument pointers. This
+  // requires target data and inbounds GEPs.
+  if (I.isInBounds()) {
+    // Check if we have a base + offset for the pointer.
+    Value *Ptr = I.getPointerOperand();
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+    if (BaseAndOffset.first) {
+      // Check if the offset of this GEP is constant, and if so accumulate it
+      // into Offset.
+      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+        // Non-constant GEPs aren't folded, and disable SROA.
+        if (SROACandidate)
+          disableSROA(CostIt);
+        return false;
+      }
+
+      // Add the result as a new mapping to Base + Offset.
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+      // Also handle SROA candidates here, we already know that the GEP is
+      // all-constant indexed.
+      if (SROACandidate)
+        SROAArgValues[&I] = SROAArg;
+
+      return true;
+    }
+  }
+
+  if (isGEPOffsetConstant(I)) {
+    if (SROACandidate)
+      SROAArgValues[&I] = SROAArg;
+
+    // Constant GEPs are modeled as free.
+    return true;
+  }
+
+  // Variable GEPs will require math and will disable SROA.
+  if (SROACandidate)
+    disableSROA(CostIt);
+  return false;
+}
+
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+  // Propagate constants through bitcasts.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offsets through casts
+  std::pair<Value *, APInt> BaseAndOffset
+    = ConstantOffsetPtrs.lookup(I.getOperand(0));
+  // Casts don't change the offset, just wrap it up.
+  if (BaseAndOffset.first)
+    ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+  // Also look for SROA candidates here.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  // Bitcasts are always zero cost.
+  return true;
+}
+
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offset pairs when converted to a plain integer provided the
+  // integer is large enough to represent the pointer.
+  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  if (IntegerSize >= DL.getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset
+      = ConstantOffsetPtrs.lookup(I.getOperand(0));
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // This is really weird. Technically, ptrtoint will disable SROA. However,
+  // unless that ptrtoint is *used* somewhere in the live basic blocks after
+  // inlining, it will be nuked, and SROA should proceed. All of the uses which
+  // would block SROA would also block SROA if applied directly to a pointer,
+  // and so we can just add the integer in here. The only places where SROA is
+  // preserved either cannot fire on an integer, or won't in-and-of themselves
+  // disable SROA (ext) w/o some later use that we would see and disable.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offset pairs when round-tripped through a pointer without
+  // modifications provided the integer is not too large.
+  Value *Op = I.getOperand(0);
+  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  if (IntegerSize <= DL.getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+  disableSROA(I.getOperand(0));
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+  Value *Operand = I.getOperand(0);
+  Constant *COp = dyn_cast<Constant>(Operand);
+  if (!COp)
+    COp = SimplifiedValues.lookup(Operand);
+  if (COp) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+                                               COp, DL)) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+  }
+
+  // Disable any SROA on the argument to arbitrary unary operators.
+  disableSROA(Operand);
+
+  return false;
+}
+
+bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
+  unsigned ArgNo = A->getArgNo();
+  return CandidateCS.paramHasAttr(ArgNo+1, Attr);
+}
+
+bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
+  // Does the *call site* have the NonNull attribute set on an argument?  We
+  // use the attribute on the call site to memoize any analysis done in the
+  // caller. This will also trip if the callee function has a non-null
+  // parameter attribute, but that's a less interesting case because hopefully
+  // the callee would already have been simplified based on that.
+  if (Argument *A = dyn_cast<Argument>(V))
+    if (paramHasAttr(A, Attribute::NonNull))
+      return true;
+  
+  // Is this an alloca in the caller?  This is distinct from the attribute case
+  // above because attributes aren't updated within the inliner itself and we
+  // always want to catch the alloca derived case.
+  if (isAllocaDerivedArg(V))
+    // We can actually predict the result of comparisons between an
+    // alloca-derived value and null. Note that this fires regardless of
+    // SROA firing.
+    return true;
+  
+  return false;
+}
+
+bool CallAnalyzer::visitCmpInst(CmpInst &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  // First try to handle simplified comparisons.
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        return true;
+      }
+  }
+
+  if (I.getOpcode() == Instruction::FCmp)
+    return false;
+
+  // Otherwise look for a comparison between constant offset pointers with
+  // a common base.
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the icmp to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrCmps;
+        return true;
+      }
+    }
+  }
+
+  // If the comparison is an equality comparison with null, we can simplify it
+  // if we know the value (argument) can't be null
+  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
+      isKnownNonNullInCallee(I.getOperand(0))) {
+    bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+    SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+                                      : ConstantInt::getFalse(I.getType());
+    return true;
+  }
+  // Finally check for SROA candidates in comparisons.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (isa<ConstantPointerNull>(I.getOperand(1))) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+  // Try to handle a special case: we can fold computing the difference of two
+  // constant-related pointers.
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the subtract to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrDiffs;
+        return true;
+      }
+    }
+  }
+
+  // Otherwise, fall back to the generic logic for simplifying and handling
+  // instructions.
+  return Base::visitSub(I);
+}
+
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  Value *SimpleV = nullptr;
+  if (auto FI = dyn_cast<FPMathOperator>(&I))
+    SimpleV =
+        SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+  else
+    SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
+  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+    SimplifiedValues[&I] = C;
+    return true;
+  }
+
+  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+  disableSROA(LHS);
+  disableSROA(RHS);
+
+  return false;
+}
+
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitStore(StoreInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
+  // Constant folding for extract value is trivial.
+  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
+  if (!C)
+    C = SimplifiedValues.lookup(I.getAggregateOperand());
+  if (C) {
+    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
+    return true;
+  }
+
+  // SROA can look through these but give them a cost.
+  return false;
+}
+
+bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
+  // Constant folding for insert value is trivial.
+  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
+  if (!AggC)
+    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
+  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
+  if (!InsertedC)
+    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
+  if (AggC && InsertedC) {
+    SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
+                                                        I.getIndices());
+    return true;
+  }
+
+  // SROA can look through these but give them a cost.
+  return false;
+}
+
+/// \brief Try to simplify a call site.
+///
+/// Takes a concrete function and callsite and tries to actually simplify it by
+/// analyzing the arguments and call itself with instsimplify. Returns true if
+/// it has simplified the callsite to some other entity (a constant), making it
+/// free.
+bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+  // FIXME: Using the instsimplify logic directly for this is inefficient
+  // because we have to continually rebuild the argument list even when no
+  // simplifications can be performed. Until that is fixed with remapping
+  // inside of instsimplify, directly constant fold calls here.
+  if (!canConstantFoldCallTo(F))
+    return false;
+
+  // Try to re-map the arguments to constants.
+  SmallVector<Constant *, 4> ConstantArgs;
+  ConstantArgs.reserve(CS.arg_size());
+  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I) {
+    Constant *C = dyn_cast<Constant>(*I);
+    if (!C)
+      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+    if (!C)
+      return false; // This argument doesn't map to a constant.
+
+    ConstantArgs.push_back(C);
+  }
+  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
+    SimplifiedValues[CS.getInstruction()] = C;
+    return true;
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitCallSite(CallSite CS) {
+  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
+      !F.hasFnAttribute(Attribute::ReturnsTwice)) {
+    // This aborts the entire analysis.
+    ExposesReturnsTwice = true;
+    return false;
+  }
+  if (CS.isCall() &&
+      cast<CallInst>(CS.getInstruction())->cannotDuplicate())
+    ContainsNoDuplicateCall = true;
+
+  if (Function *F = CS.getCalledFunction()) {
+    // When we have a concrete function, first try to simplify it directly.
+    if (simplifyCallSite(F, CS))
+      return true;
+
+    // Next check if it is an intrinsic we know about.
+    // FIXME: Lift this into part of the InstVisitor.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+      switch (II->getIntrinsicID()) {
+      default:
+        return Base::visitCallSite(CS);
+
+      case Intrinsic::memset:
+      case Intrinsic::memcpy:
+      case Intrinsic::memmove:
+        // SROA can usually chew through these intrinsics, but they aren't free.
+        return false;
+      case Intrinsic::localescape:
+        HasFrameEscape = true;
+        return false;
+      }
+    }
+
+    if (F == CS.getInstruction()->getParent()->getParent()) {
+      // This flag will fully abort the analysis, so don't bother with anything
+      // else.
+      IsRecursiveCall = true;
+      return false;
+    }
+
+    if (TTI.isLoweredToCall(F)) {
+      // We account for the average 1 instruction per call argument setup
+      // here.
+      Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+      // Everything other than inline ASM will also have a significant cost
+      // merely from making the call.
+      if (!isa<InlineAsm>(CS.getCalledValue()))
+        Cost += InlineConstants::CallPenalty;
+    }
+
+    return Base::visitCallSite(CS);
+  }
+
+  // Otherwise we're in a very special case -- an indirect function call. See
+  // if we can be particularly clever about this.
+  Value *Callee = CS.getCalledValue();
+
+  // First, pay the price of the argument setup. We account for the average
+  // 1 instruction per call argument setup here.
+  Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+  // Next, check if this happens to be an indirect function call to a known
+  // function in this inline context. If not, we've done all we can.
+  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+  if (!F)
+    return Base::visitCallSite(CS);
+
+  // If we have a constant that we are calling as a function, we can peer
+  // through it and see the function target. This happens not infrequently
+  // during devirtualization and so we want to give it a hefty bonus for
+  // inlining, but cap that bonus in the event that inlining wouldn't pan
+  // out. Pretend to inline the function, with a custom threshold.
+  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
+  if (CA.analyzeCall(CS)) {
+    // We were able to inline the indirect call! Subtract the cost from the
+    // threshold to get the bonus we want to apply, but don't go below zero.
+    Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+  }
+
+  return Base::visitCallSite(CS);
+}
+
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+  // At least one return instruction will be free after inlining.
+  bool Free = !HasReturn;
+  HasReturn = true;
+  return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+  // We model unconditional branches as essentially free -- they really
+  // shouldn't exist at all, but handling them makes the behavior of the
+  // inliner more regular and predictable. Interestingly, conditional branches
+  // which will fold away are also free.
+  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+         dyn_cast_or_null<ConstantInt>(
+             SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+  // We model unconditional switches as free, see the comments on handling
+  // branches.
+  if (isa<ConstantInt>(SI.getCondition()))
+    return true;
+  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
+    if (isa<ConstantInt>(V))
+      return true;
+
+  // Otherwise, we need to accumulate a cost proportional to the number of
+  // distinct successor blocks. This fan-out in the CFG cannot be represented
+  // for free even if we can represent the core switch as a jumptable that
+  // takes a single instruction.
+  //
+  // NB: We convert large switches which are just used to initialize large phi
+  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+  // inlining those. It will prevent inlining in cases where the optimization
+  // does not (yet) fire.
+  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
+  SuccessorBlocks.insert(SI.getDefaultDest());
+  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
+    SuccessorBlocks.insert(I.getCaseSuccessor());
+  // Add cost corresponding to the number of distinct destinations. The first
+  // we model as free because of fallthrough.
+  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+  return false;
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this
+  // indirect jump would jump from the inlined copy of the function into the
+  // original function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions with
+  // indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.
+  HasIndirectBr = true;
+  return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a resume instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a cleanupret instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a catchret instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+  // FIXME: It might be reasonably to discount the cost of instructions leading
+  // to unreachable as they have the lowest possible impact on both runtime and
+  // code size.
+  return true; // No actual code is needed for unreachable.
+}
+
+bool CallAnalyzer::visitInstruction(Instruction &I) {
+  // Some instructions are free. All of the free intrinsics can also be
+  // handled by SROA, etc.
+  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
+    return true;
+
+  // We found something we don't understand or can't handle. Mark any SROA-able
+  // values in the operand list as no longer viable.
+  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
+    disableSROA(*OI);
+
+  return false;
+}
+
+
+/// \brief Analyze a basic block for its contribution to the inline cost.
+///
+/// This method walks the analyzer over every instruction in the given basic
+/// block and accounts for their cost during inlining at this callsite. It
+/// aborts early if the threshold has been exceeded or an impossible to inline
+/// construct has been detected. It returns false if inlining is no longer
+/// viable, and true if inlining remains viable.
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
+                                SmallPtrSetImpl<const Value *> &EphValues) {
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    // FIXME: Currently, the number of instructions in a function regardless of
+    // our ability to simplify them during inline to constants or dead code,
+    // are actually used by the vector bonus heuristic. As long as that's true,
+    // we have to special case debug intrinsics here to prevent differences in
+    // inlining due to debug symbols. Eventually, the number of unsimplified
+    // instructions shouldn't factor into the cost computation, but until then,
+    // hack around it here.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Skip ephemeral values.
+    if (EphValues.count(&*I))
+      continue;
+
+    ++NumInstructions;
+    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+      ++NumVectorInstructions;
+
+    // If the instruction is floating point, and the target says this operation
+    // is expensive or the function has the "use-soft-float" attribute, this may
+    // eventually become a library call. Treat the cost as such.
+    if (I->getType()->isFloatingPointTy()) {
+      bool hasSoftFloatAttr = false;
+
+      // If the function has the "use-soft-float" attribute, mark it as
+      // expensive.
+      if (F.hasFnAttribute("use-soft-float")) {
+        Attribute Attr = F.getFnAttribute("use-soft-float");
+        StringRef Val = Attr.getValueAsString();
+        if (Val == "true")
+          hasSoftFloatAttr = true;
+      }
+
+      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+          hasSoftFloatAttr)
+        Cost += InlineConstants::CallPenalty;
+    }
+
+    // If the instruction simplified to a constant, there is no cost to this
+    // instruction. Visit the instructions using our InstVisitor to account for
+    // all of the per-instruction logic. The visit tree returns true if we
+    // consumed the instruction in any way, and false if the instruction's base
+    // cost should count against inlining.
+    if (Base::visit(&*I))
+      ++NumInstructionsSimplified;
+    else
+      Cost += InlineConstants::InstrCost;
+
+    // If the visit this instruction detected an uninlinable pattern, abort.
+    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+        HasIndirectBr || HasFrameEscape)
+      return false;
+
+    // If the caller is a recursive function then we don't want to inline
+    // functions which allocate a lot of stack space because it would increase
+    // the caller stack usage dramatically.
+    if (IsCallerRecursive &&
+        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+      return false;
+
+    // Check if we've past the maximum possible threshold so we don't spin in
+    // huge basic blocks that will never inline.
+    if (Cost > Threshold)
+      return false;
+  }
+
+  return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
+  if (!V->getType()->isPointerTy())
+    return nullptr;
+
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  unsigned IntPtrWidth = DL.getPointerSizeInBits();
+  APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
+        return nullptr;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V).second);
+
+  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
+  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
+}
+
+/// \brief Analyze a call site for potential inlining.
+///
+/// Returns true if inlining this call is viable, and false if it is not
+/// viable. It computes the cost and adjusts the threshold based on numerous
+/// factors and heuristics. If this method returns false but the computed cost
+/// is below the computed threshold, then inlining was forcibly disabled by
+/// some artifact of the routine.
+bool CallAnalyzer::analyzeCall(CallSite CS) {
+  ++NumCallsAnalyzed;
+
+  // Perform some tweaks to the cost and threshold based on the direct
+  // callsite information.
+
+  // We want to more aggressively inline vector-dense kernels, so up the
+  // threshold, and we'll lower it if the % of vector instructions gets too
+  // low. Note that these bonuses are some what arbitrary and evolved over time
+  // by accident as much as because they are principled bonuses.
+  //
+  // FIXME: It would be nice to remove all such bonuses. At least it would be
+  // nice to base the bonus values on something more scientific.
+  assert(NumInstructions == 0);
+  assert(NumVectorInstructions == 0);
+  FiftyPercentVectorBonus = 3 * Threshold / 2;
+  TenPercentVectorBonus = 3 * Threshold / 4;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  // Track whether the post-inlining function would have more than one basic
+  // block. A single basic block is often intended for inlining. Balloon the
+  // threshold by 50% until we pass the single-BB phase.
+  bool SingleBB = true;
+  int SingleBBBonus = Threshold / 2;
+
+  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
+  // this Threshold any time, and cost cannot decrease, we can stop processing
+  // the rest of the function body.
+  Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
+
+  // Give out bonuses per argument, as the instructions setting them up will
+  // be gone after inlining.
+  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+    if (CS.isByValArgument(I)) {
+      // We approximate the number of loads and stores needed by dividing the
+      // size of the byval type by the target's pointer size.
+      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = DL.getPointerSizeInBits();
+      // Ceiling division.
+      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+      // If it generates more than 8 stores it is likely to be expanded as an
+      // inline memcpy so we take that as an upper bound. Otherwise we assume
+      // one load and one store per word copied.
+      // FIXME: The maxStoresPerMemcpy setting from the target should be used
+      // here instead of a magic number of 8, but it's not available via
+      // DataLayout.
+      NumStores = std::min(NumStores, 8U);
+
+      Cost -= 2 * NumStores * InlineConstants::InstrCost;
+    } else {
+      // For non-byval arguments subtract off one instruction per call
+      // argument.
+      Cost -= InlineConstants::InstrCost;
+    }
+  }
+
+  // If there is only one call of the function, and it has internal linkage,
+  // the cost of inlining it drops dramatically.
+  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+    &F == CS.getCalledFunction();
+  if (OnlyOneCallAndLocalLinkage)
+    Cost += InlineConstants::LastCallToStaticBonus;
+
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn. As such,
+  // there is little point in inlining this unless there is literally zero
+  // cost.
+  Instruction *Instr = CS.getInstruction();
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      Threshold = 0;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+    Threshold = 0;
+
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (F.getCallingConv() == CallingConv::Cold)
+    Cost += InlineConstants::ColdccPenalty;
+
+  // Check if we're done. This can happen due to bonuses and penalties.
+  if (Cost > Threshold)
+    return false;
+
+  if (F.empty())
+    return true;
+
+  Function *Caller = CS.getInstruction()->getParent()->getParent();
+  // Check if the caller function is recursive itself.
+  for (User *U : Caller->users()) {
+    CallSite Site(U);
+    if (!Site)
+      continue;
+    Instruction *I = Site.getInstruction();
+    if (I->getParent()->getParent() == Caller) {
+      IsCallerRecursive = true;
+      break;
+    }
+  }
+
+  // Populate our simplified values by mapping from function arguments to call
+  // arguments with known important simplifications.
+  CallSite::arg_iterator CAI = CS.arg_begin();
+  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
+       FAI != FAE; ++FAI, ++CAI) {
+    assert(CAI != CS.arg_end());
+    if (Constant *C = dyn_cast<Constant>(CAI))
+      SimplifiedValues[&*FAI] = C;
+
+    Value *PtrArg = *CAI;
+    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
+      ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
+
+      // We can SROA any pointer arguments derived from alloca instructions.
+      if (isa<AllocaInst>(PtrArg)) {
+        SROAArgValues[&*FAI] = PtrArg;
+        SROAArgCosts[PtrArg] = 0;
+      }
+    }
+  }
+  NumConstantArgs = SimplifiedValues.size();
+  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
+  NumAllocaArgs = SROAArgValues.size();
+
+  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
+  // the ephemeral values multiple times (and they're completely determined by
+  // the callee, so this is purely duplicate work).
+  SmallPtrSet<const Value *, 32> EphValues;
+  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
+
+  // The worklist of live basic blocks in the callee *after* inlining. We avoid
+  // adding basic blocks of the callee which can be proven to be dead for this
+  // particular call site in order to get more accurate cost estimates. This
+  // requires a somewhat heavyweight iteration pattern: we need to walk the
+  // basic blocks in a breadth-first order as we insert live successors. To
+  // accomplish this, prioritizing for small iterations because we exit after
+  // crossing our threshold, we use a small-size optimized SetVector.
+  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
+                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+  BBSetVector BBWorklist;
+  BBWorklist.insert(&F.getEntryBlock());
+  // Note that we *must not* cache the size, this loop grows the worklist.
+  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+    // Bail out the moment we cross the threshold. This means we'll under-count
+    // the cost, but only when undercounting doesn't matter.
+    if (Cost > Threshold)
+      break;
+
+    BasicBlock *BB = BBWorklist[Idx];
+    if (BB->empty())
+      continue;
+
+    // Disallow inlining a blockaddress. A blockaddress only has defined
+    // behavior for an indirect branch in the same function, and we do not
+    // currently support inlining indirect branches. But, the inliner may not
+    // see an indirect branch that ends up being dead code at a particular call
+    // site. If the blockaddress escapes the function, e.g., via a global
+    // variable, inlining may lead to an invalid cross-function reference.
+    if (BB->hasAddressTaken())
+      return false;
+
+    // Analyze the cost of this block. If we blow through the threshold, this
+    // returns false, and we can bail on out.
+    if (!analyzeBlock(BB, EphValues)) {
+      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+          HasIndirectBr || HasFrameEscape)
+        return false;
+
+      // If the caller is a recursive function then we don't want to inline
+      // functions which allocate a lot of stack space because it would increase
+      // the caller stack usage dramatically.
+      if (IsCallerRecursive &&
+          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+        return false;
+
+      break;
+    }
+
+    TerminatorInst *TI = BB->getTerminator();
+
+    // Add in the live successors by first checking whether we have terminator
+    // that may be simplified based on the values simplified by this call.
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional()) {
+        Value *Cond = BI->getCondition();
+        if (ConstantInt *SimpleCond
+              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+          continue;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      Value *Cond = SI->getCondition();
+      if (ConstantInt *SimpleCond
+            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+        continue;
+      }
+    }
+
+    // If we're unable to select a particular successor, just count all of
+    // them.
+    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
+         ++TIdx)
+      BBWorklist.insert(TI->getSuccessor(TIdx));
+
+    // If we had any successors at this point, than post-inlining is likely to
+    // have them as well. Note that we assume any basic blocks which existed
+    // due to branches or switches which folded above will also fold after
+    // inlining.
+    if (SingleBB && TI->getNumSuccessors() > 1) {
+      // Take off the bonus we applied to the threshold.
+      Threshold -= SingleBBBonus;
+      SingleBB = false;
+    }
+  }
+
+  // If this is a noduplicate call, we can still inline as long as
+  // inlining this would cause the removal of the caller (so the instruction
+  // is not actually duplicated, just moved).
+  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
+    return false;
+
+  // We applied the maximum possible vector bonus at the beginning. Now,
+  // subtract the excess bonus, if any, from the Threshold before
+  // comparing against Cost.
+  if (NumVectorInstructions <= NumInstructions / 10)
+    Threshold -= FiftyPercentVectorBonus;
+  else if (NumVectorInstructions <= NumInstructions / 2)
+    Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
+
+  return Cost <= std::max(0, Threshold);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// \brief Dump stats about this call's analysis.
+void CallAnalyzer::dump() {
+#define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
+  DEBUG_PRINT_STAT(NumConstantArgs);
+  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
+  DEBUG_PRINT_STAT(NumAllocaArgs);
+  DEBUG_PRINT_STAT(NumConstantPtrCmps);
+  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
+  DEBUG_PRINT_STAT(NumInstructionsSimplified);
+  DEBUG_PRINT_STAT(NumInstructions);
+  DEBUG_PRINT_STAT(SROACostSavings);
+  DEBUG_PRINT_STAT(SROACostSavingsLost);
+  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
+  DEBUG_PRINT_STAT(Cost);
+  DEBUG_PRINT_STAT(Threshold);
+#undef DEBUG_PRINT_STAT
+}
+#endif
+
+/// \brief Test that two functions either have or have not the given attribute
+///        at the same time.
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
+}
+
+/// \brief Test that there are no attribute conflicts between Caller and Callee
+///        that prevent inlining.
+static bool functionsHaveCompatibleAttributes(Function *Caller,
+                                              Function *Callee,
+                                              TargetTransformInfo &TTI) {
+  return TTI.areInlineCompatible(Caller, Callee) &&
+         AttributeFuncs::areInlineCompatible(*Caller, *Callee);
+}
+
+InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
+                               TargetTransformInfo &CalleeTTI,
+                               AssumptionCacheTracker *ACT) {
+  return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
+}
+
+InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
+                               TargetTransformInfo &CalleeTTI,
+                               AssumptionCacheTracker *ACT) {
+  // Cannot inline indirect calls.
+  if (!Callee)
+    return llvm::InlineCost::getNever();
+
+  // Calls to functions with always-inline attributes should be inlined
+  // whenever possible.
+  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
+    if (isInlineViable(*Callee))
+      return llvm::InlineCost::getAlways();
+    return llvm::InlineCost::getNever();
+  }
+
+  // Never inline functions with conflicting attributes (unless callee has
+  // always-inline attribute).
+  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
+    return llvm::InlineCost::getNever();
+
+  // Don't inline this call if the caller has the optnone attribute.
+  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+    return llvm::InlineCost::getNever();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
+  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
+        << "...\n");
+
+  CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
+  bool ShouldInline = CA.analyzeCall(CS);
+
+  DEBUG(CA.dump());
+
+  // Check if there was a reason to force inlining or no inlining.
+  if (!ShouldInline && CA.getCost() < CA.getThreshold())
+    return InlineCost::getNever();
+  if (ShouldInline && CA.getCost() >= CA.getThreshold())
+    return InlineCost::getAlways();
+
+  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+}
+
+bool llvm::isInlineViable(Function &F) {
+  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
+  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+    // Disallow inlining of functions which contain indirect branches or
+    // blockaddresses.
+    if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
+      return false;
+
+    for (auto &II : *BI) {
+      CallSite CS(&II);
+      if (!CS)
+        continue;
+
+      // Disallow recursive calls.
+      if (&F == CS.getCalledFunction())
+        return false;
+
+      // Disallow calls which expose returns-twice to a function not previously
+      // attributed as such.
+      if (!ReturnsTwice && CS.isCall() &&
+          cast<CallInst>(CS.getInstruction())->canReturnTwice())
+        return false;
+
+      // Disallow inlining functions that call @llvm.localescape. Doing this
+      // correctly would require major changes to the inliner.
+      if (CS.getCalledFunction() &&
+          CS.getCalledFunction()->getIntrinsicID() ==
+              llvm::Intrinsic::localescape)
+        return false;
+    }
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
new file mode 100644
index 0000000..de2b9c0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -0,0 +1,88 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instcount"
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+  STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/IR/Instruction.def"
+
+
+namespace {
+  class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+    friend class InstVisitor<InstCount>;
+
+    void visitFunction  (Function &F) { ++TotalFuncs; }
+    void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+    void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/IR/Instruction.def"
+
+    void visitInstruction(Instruction &I) {
+      errs() << "Instruction Count does not know about " << I;
+      llvm_unreachable(nullptr);
+    }
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    InstCount() : FunctionPass(ID) {
+      initializeInstCountPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+    void print(raw_ostream &O, const Module *M) const override {}
+
+  };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+                "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+  unsigned StartMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  visit(F);
+  unsigned EndMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  TotalMemInst += EndMemInsts-StartMemInsts;
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 0000000..b89ff26
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,4181 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "instsimplify"
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand,  "Number of expansions");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+namespace {
+struct Query {
+  const DataLayout &DL;
+  const TargetLibraryInfo *TLI;
+  const DominatorTree *DT;
+  AssumptionCache *AC;
+  const Instruction *CxtI;
+
+  Query(const DataLayout &DL, const TargetLibraryInfo *tli,
+        const DominatorTree *dt, AssumptionCache *ac = nullptr,
+        const Instruction *cxti = nullptr)
+      : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
+};
+} // end anonymous namespace
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+                            unsigned);
+static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+                              const Query &, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+                              unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+
+/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// a vector with every element false, as appropriate for the type.
+static Constant *getFalse(Type *Ty) {
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
+         "Expected i1 type or a vector of i1!");
+  return Constant::getNullValue(Ty);
+}
+
+/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// a vector with every element true, as appropriate for the type.
+static Constant *getTrue(Type *Ty) {
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
+         "Expected i1 type or a vector of i1!");
+  return Constant::getAllOnesValue(Ty);
+}
+
+/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
+static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
+                          Value *RHS) {
+  CmpInst *Cmp = dyn_cast<CmpInst>(V);
+  if (!Cmp)
+    return false;
+  CmpInst::Predicate CPred = Cmp->getPredicate();
+  Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1);
+  if (CPred == Pred && CLHS == LHS && CRHS == RHS)
+    return true;
+  return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
+    CRHS == LHS;
+}
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    // Arguments and constants dominate all instructions.
+    return true;
+
+  // If we are processing instructions (and/or basic blocks) that have not been
+  // fully added to a function, the parent nodes may still be null. Simply
+  // return the conservative answer in these cases.
+  if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+    return false;
+
+  // If we have a DominatorTree then do a precise test.
+  if (DT) {
+    if (!DT->isReachableFromEntry(P->getParent()))
+      return true;
+    if (!DT->isReachableFromEntry(I->getParent()))
+      return false;
+    return DT->dominates(I, P);
+  }
+
+  // Otherwise, if the instruction is in the entry block and is not an invoke,
+  // then it obviously dominates all phi nodes.
+  if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+      !isa<InvokeInst>(I))
+    return true;
+
+  return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                          unsigned OpcToExpand, const Query &Q,
+                          unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Check whether the expression has the form "(A op' B) op C".
+  if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+    if (Op0->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op C) op' (B op C)".
+      Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+      // Do "A op C" and "B op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+          if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == B && R == A)) {
+            ++NumExpand;
+            return LHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  // Check whether the expression has the form "A op (B op' C)".
+  if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+    if (Op1->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op B) op' (A op C)".
+      Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+      // Do "A op B" and "A op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+          if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == C && R == B)) {
+            ++NumExpand;
+            return RHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  return nullptr;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations.  Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+                                       const Query &Q, unsigned MaxRecurse) {
+  Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+  assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "B op C" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+      // It does!  Return "A op V" if it simplifies or is already available.
+      // If V equals B then "A op V" is just the LHS.
+      if (V == B) return LHS;
+      // Otherwise return "A op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "A op B" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
+      // It does!  Return "V op C" if it simplifies or is already available.
+      // If V equals B then "V op C" is just the RHS.
+      if (V == B) return RHS;
+      // Otherwise return "V op C" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // The remaining transforms require commutativity as well as associativity.
+  if (!Instruction::isCommutative(Opcode))
+    return nullptr;
+
+  // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+      // It does!  Return "V op B" if it simplifies or is already available.
+      // If V equals A then "V op B" is just the LHS.
+      if (V == A) return LHS;
+      // Otherwise return "V op B" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+      // It does!  Return "B op V" if it simplifies or is already available.
+      // If V equals C then "B op V" is just the RHS.
+      if (V == C) return RHS;
+      // Otherwise return "B op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+                                    const Query &Q, unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  SelectInst *SI;
+  if (isa<SelectInst>(LHS)) {
+    SI = cast<SelectInst>(LHS);
+  } else {
+    assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+    SI = cast<SelectInst>(RHS);
+  }
+
+  // Evaluate the BinOp on the true and false branches of the select.
+  Value *TV;
+  Value *FV;
+  if (SI == LHS) {
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
+  } else {
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
+  }
+
+  // If they simplified to the same value, then return the common value.
+  // If they both failed to simplify then return null.
+  if (TV == FV)
+    return TV;
+
+  // If one branch simplified to undef, return the other one.
+  if (TV && isa<UndefValue>(TV))
+    return FV;
+  if (FV && isa<UndefValue>(FV))
+    return TV;
+
+  // If applying the operation did not change the true and false select values,
+  // then the result of the binop is the select itself.
+  if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+    return SI;
+
+  // If one branch simplified and the other did not, and the simplified
+  // value is equal to the unsimplified one, return the simplified value.
+  // For example, select (cond, X, X & Z) & Z -> X & Z.
+  if ((FV && !TV) || (TV && !FV)) {
+    // Check that the simplified value has the form "X op Y" where "op" is the
+    // same as the original operation.
+    Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+    if (Simplified && Simplified->getOpcode() == Opcode) {
+      // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+      // We already know that "op" is the same as for the simplified value.  See
+      // if the operands match too.  If so, return the simplified value.
+      Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+      Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+      Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+      if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+          Simplified->getOperand(1) == UnsimplifiedRHS)
+        return Simplified;
+      if (Simplified->isCommutative() &&
+          Simplified->getOperand(1) == UnsimplifiedLHS &&
+          Simplified->getOperand(0) == UnsimplifiedRHS)
+        return Simplified;
+    }
+  }
+
+  return nullptr;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value.  Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const Query &Q,
+                                  unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Make sure the select is on the LHS.
+  if (!isa<SelectInst>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+  SelectInst *SI = cast<SelectInst>(LHS);
+  Value *Cond = SI->getCondition();
+  Value *TV = SI->getTrueValue();
+  Value *FV = SI->getFalseValue();
+
+  // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+  // Does "cmp TV, RHS" simplify?
+  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
+  if (TCmp == Cond) {
+    // It not only simplified, it simplified to the select condition.  Replace
+    // it with 'true'.
+    TCmp = getTrue(Cond->getType());
+  } else if (!TCmp) {
+    // It didn't simplify.  However if "cmp TV, RHS" is equal to the select
+    // condition then we can replace it with 'true'.  Otherwise give up.
+    if (!isSameCompare(Cond, Pred, TV, RHS))
+      return nullptr;
+    TCmp = getTrue(Cond->getType());
+  }
+
+  // Does "cmp FV, RHS" simplify?
+  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
+  if (FCmp == Cond) {
+    // It not only simplified, it simplified to the select condition.  Replace
+    // it with 'false'.
+    FCmp = getFalse(Cond->getType());
+  } else if (!FCmp) {
+    // It didn't simplify.  However if "cmp FV, RHS" is equal to the select
+    // condition then we can replace it with 'false'.  Otherwise give up.
+    if (!isSameCompare(Cond, Pred, FV, RHS))
+      return nullptr;
+    FCmp = getFalse(Cond->getType());
+  }
+
+  // If both sides simplified to the same value, then use it as the result of
+  // the original comparison.
+  if (TCmp == FCmp)
+    return TCmp;
+
+  // The remaining cases only make sense if the select condition has the same
+  // type as the result of the comparison, so bail out if this is not so.
+  if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
+    return nullptr;
+  // If the false value simplified to false, then the result of the compare
+  // is equal to "Cond && TCmp".  This also catches the case when the false
+  // value simplified to false and the true value to true, returning "Cond".
+  if (match(FCmp, m_Zero()))
+    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+      return V;
+  // If the true value simplified to true, then the result of the compare
+  // is equal to "Cond || FCmp".
+  if (match(TCmp, m_One()))
+    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+      return V;
+  // Finally, if the false value simplified to true and the true value to
+  // false, then the result of the compare is equal to "!Cond".
+  if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+    if (Value *V =
+        SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+                        Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value.  If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+                                 const Query &Q, unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  PHINode *PI;
+  if (isa<PHINode>(LHS)) {
+    PI = cast<PHINode>(LHS);
+    // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(RHS, PI, Q.DT))
+      return nullptr;
+  } else {
+    assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+    PI = cast<PHINode>(RHS);
+    // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(LHS, PI, Q.DT))
+      return nullptr;
+  }
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = nullptr;
+  for (Value *Incoming : PI->incoming_values()) {
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = PI == LHS ?
+      SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return nullptr;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time.  If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Make sure the phi is on the LHS.
+  if (!isa<PHINode>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+  PHINode *PI = cast<PHINode>(LHS);
+
+  // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+  if (!ValueDominatesPHI(RHS, PI, Q.DT))
+    return nullptr;
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = nullptr;
+  for (Value *Incoming : PI->incoming_values()) {
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return nullptr;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+                                      Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X + undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // X + 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X + (Y - X) -> Y
+  // (Y - X) + X -> Y
+  // Eg: X + -X -> 0
+  Value *Y = nullptr;
+  if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+      match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+    return Y;
+
+  // X + ~X -> -1   since   ~X = -X-1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  /// i1 add -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
+                                          MaxRecurse))
+    return V;
+
+  // Threading Add over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A + select(cond, B, C)" means evaluating
+  // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A+B" and "A+C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+///
+/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
+/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
+/// folding.
+static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
+                                                bool AllowNonInbounds = false) {
+  assert(V->getType()->getScalarType()->isPointerTy());
+
+  Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
+  APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if ((!AllowNonInbounds && !GEP->isInBounds()) ||
+          !GEP->accumulateConstantOffset(DL, Offset))
+        break;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->getScalarType()->isPointerTy() &&
+           "Unexpected operand type!");
+  } while (Visited.insert(V).second);
+
+  Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
+  if (V->getType()->isVectorTy())
+    return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
+                                    OffsetIntPtr);
+  return OffsetIntPtr;
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
+                                          Value *RHS) {
+  Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+  Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+
+  // If LHS and RHS are not related via constant offsets to the same base
+  // value, there is nothing we can do here.
+  if (LHS != RHS)
+    return nullptr;
+
+  // Otherwise, the difference of LHS - RHS can be computed as:
+  //    LHS - RHS
+  //  = (LHSOffset + Base) - (RHSOffset + Base)
+  //  = LHSOffset - RHSOffset
+  return ConstantExpr::getSub(LHSOffset, RHSOffset);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0))
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+  // X - undef -> undef
+  // undef - X -> undef
+  if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+    return UndefValue::get(Op0->getType());
+
+  // X - 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X - X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 - X -> 0 if the sub is NUW.
+  if (isNUW && match(Op0, m_Zero()))
+    return Op0;
+
+  // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+  // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+  Value *X = nullptr, *Y = nullptr, *Z = Op1;
+  if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+    // See if "V === Y - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
+      // It does!  Now see if "X + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+      // It does!  Now see if "Y + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+  // For example, X - (X + 1) -> -1
+  X = Op0;
+  if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+    // See if "V === X - Y" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+      // It does!  Now see if "V - Z" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+      // It does!  Now see if "V - Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+  // For example, X - (X - Y) -> Y.
+  Z = Op0;
+  if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+    // See if "V === Z - X" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
+      // It does!  Now see if "V + Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+
+  // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+  if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+      match(Op1, m_Trunc(m_Value(Y))))
+    if (X->getType() == Y->getType())
+      // See if "V === X - Y" simplifies.
+      if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+        // It does!  Now see if "trunc V" simplifies.
+        if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+          // It does, return the simplified "trunc V".
+          return W;
+
+  // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+  if (match(Op0, m_PtrToInt(m_Value(X))) &&
+      match(Op1, m_PtrToInt(m_Value(Y))))
+    if (Constant *Result = computePointerDifference(Q.DL, X, Y))
+      return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
+  // i1 sub -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+      return V;
+
+  // Threading Sub over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A - select(cond, B, C)" means evaluating
+  // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A-B" and "A-C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return nullptr;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+/// Given operands for an FAdd, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // fadd X, -0 ==> X
+  if (match(Op1, m_NegZero()))
+    return Op0;
+
+  // fadd X, 0 ==> X, when we know X is not -0
+  if (match(Op1, m_Zero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+  //   where nnan and ninf have to occur at least once somewhere in this
+  //   expression
+  Value *SubOp = nullptr;
+  if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
+    SubOp = Op1;
+  else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
+    SubOp = Op0;
+  if (SubOp) {
+    Instruction *FSub = cast<Instruction>(SubOp);
+    if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
+        (FMF.noInfs() || FSub->hasNoInfs()))
+      return Constant::getNullValue(Op0->getType());
+  }
+
+  return nullptr;
+}
+
+/// Given operands for an FSub, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+  }
+
+  // fsub X, 0 ==> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // fsub X, -0 ==> X, when we know X is not -0
+  if (match(Op1, m_NegZero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fsub 0, (fsub -0.0, X) ==> X
+  Value *X;
+  if (match(Op0, m_AnyZero())) {
+    if (match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+      return X;
+    if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+      return X;
+  }
+
+  // fsub nnan x, x ==> 0.0
+  if (FMF.noNaNs() && Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  return nullptr;
+}
+
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
+                               FastMathFlags FMF,
+                               const Query &Q,
+                               unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+ }
+
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+   return Op0;
+
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+   return Op1;
+
+ return nullptr;
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X * undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X * 0 -> 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X * 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  // (X / Y) * Y -> X if the division is exact.
+  Value *X = nullptr;
+  if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+      match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))   // Y * (X / Y)
+    return X;
+
+  // i1 mul -> and.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+                             Q, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
+                                      MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const Query &Q, unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
+    }
+  }
+
+  bool isSigned = Opcode == Instruction::SDiv;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // X / 0 -> undef, we don't need to preserve faults!
+  if (match(Op1, m_Zero()))
+    return UndefValue::get(Op1->getType());
+
+  // undef / X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 / X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X / 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be division by zero, hence it must be division by one.
+    return Op0;
+
+  // X / X -> 1
+  if (Op0 == Op1)
+    return ConstantInt::get(Op0->getType(), 1);
+
+  // (X * Y) / Y -> X if the multiplication does not overflow.
+  Value *X = nullptr, *Y = nullptr;
+  if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+    if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+    OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
+    // If the Mul knows it does not overflow, then we are good to go.
+    if ((isSigned && Mul->hasNoSignedWrap()) ||
+        (!isSigned && Mul->hasNoUnsignedWrap()))
+      return X;
+    // If X has the form X = A / Y then X * Y cannot overflow.
+    if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+      if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+        return X;
+  }
+
+  // (X rem Y) / Y -> 0
+  if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+      (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X /u C1) /u C2 -> 0 if C1 * C2 overflow
+  ConstantInt *C1, *C2;
+  if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
+      match(Op1, m_ConstantInt(C2))) {
+    bool Overflow;
+    C1->getValue().umul_ov(C2->getValue(), Overflow);
+    if (Overflow)
+      return Constant::getNullValue(Op0->getType());
+  }
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const Query &Q, unsigned) {
+  // undef / X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // 0 / X -> 0
+  // Requires that NaNs are off (X could be zero) and signed zeroes are
+  // ignored (X could be positive or negative, so the output sign is unknown).
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+    return Op0;
+
+  if (FMF.noNaNs()) {
+    // X / X -> 1.0 is legal when NaNs are ignored.
+    if (Op0 == Op1)
+      return ConstantFP::get(Op0->getType(), 1.0);
+
+    // -X /  X -> -1.0 and
+    //  X / -X -> -1.0 are legal when NaNs are ignored.
+    // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
+    if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
+         BinaryOperator::getFNegArgument(Op0) == Op1) ||
+        (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
+         BinaryOperator::getFNegArgument(Op1) == Op0))
+      return ConstantFP::get(Op0->getType(), -1.0);
+  }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const Query &Q, unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
+    }
+  }
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef % X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 % X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X % 0 -> undef, we don't need to preserve faults!
+  if (match(Op1, m_Zero()))
+    return UndefValue::get(Op0->getType());
+
+  // X % 1 -> 0
+  if (match(Op1, m_One()))
+    return Constant::getNullValue(Op0->getType());
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be remainder by zero, hence it must be remainder by one.
+    return Constant::getNullValue(Op0->getType());
+
+  // X % X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // (X % Y) % Y -> X % Y
+  if ((Opcode == Instruction::SRem &&
+       match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+      (Opcode == Instruction::URem &&
+       match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+    return Op0;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const Query &, unsigned) {
+  // undef % X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // 0 % X -> 0
+  // Requires that NaNs are off (X could be zero) and signed zeroes are
+  // ignored (X could be positive or negative, so the output sign is unknown).
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+    return Op0;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
+static bool isUndefShift(Value *Amount) {
+  Constant *C = dyn_cast<Constant>(Amount);
+  if (!C)
+    return false;
+
+  // X shift by undef -> undef because it may shift by the bitwidth.
+  if (isa<UndefValue>(C))
+    return true;
+
+  // Shifting by the bitwidth or more is undefined.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    if (CI->getValue().getLimitedValue() >=
+        CI->getType()->getScalarSizeInBits())
+      return true;
+
+  // If all lanes of a vector shift are undefined the whole shift is.
+  if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) {
+    for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I)
+      if (!isUndefShift(C->getAggregateElement(I)))
+        return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+                            const Query &Q, unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
+    }
+  }
+
+  // 0 shift by X -> 0
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X shift by 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // Fold undefined shifts.
+  if (isUndefShift(Op1))
+    return UndefValue::get(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+/// \brief Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
+                                 bool isExact, const Query &Q,
+                                 unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  // X >> X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // undef >> X -> 0
+  // undef >> X -> undef (if it's exact)
+  if (match(Op0, m_Undef()))
+    return isExact ? Op0 : Constant::getNullValue(Op0->getType());
+
+  // The low bit cannot be shifted out of an exact shift if it is set.
+  if (isExact) {
+    unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+    APInt Op0KnownZero(BitWidth, 0);
+    APInt Op0KnownOne(BitWidth, 0);
+    computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC,
+                     Q.CxtI, Q.DT);
+    if (Op0KnownOne[0])
+      return Op0;
+  }
+
+  return nullptr;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
+    return V;
+
+  // undef << X -> 0
+  // undef << X -> undef if (if it's NSW/NUW)
+  if (match(Op0, m_Undef()))
+    return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType());
+
+  // (X >> A) << A -> X
+  Value *X;
+  if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
+    return X;
+  return nullptr;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
+                                    MaxRecurse))
+      return V;
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
+    return X;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
+                                    MaxRecurse))
+    return V;
+
+  // all ones >>a X -> all ones
+  if (match(Op0, m_AllOnes()))
+    return Op0;
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
+    return X;
+
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return Op0;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
+                                         ICmpInst *UnsignedICmp, bool IsAnd) {
+  Value *X, *Y;
+
+  ICmpInst::Predicate EqPred;
+  if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) ||
+      !ICmpInst::isEquality(EqPred))
+    return nullptr;
+
+  ICmpInst::Predicate UnsignedPred;
+  if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
+      ICmpInst::isUnsigned(UnsignedPred))
+    ;
+  else if (match(UnsignedICmp,
+                 m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) &&
+           ICmpInst::isUnsigned(UnsignedPred))
+    UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+  else
+    return nullptr;
+
+  // X < Y && Y != 0  -->  X < Y
+  // X < Y || Y != 0  -->  Y != 0
+  if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
+    return IsAnd ? UnsignedICmp : ZeroICmp;
+
+  // X >= Y || Y != 0  -->  true
+  // X >= Y || Y == 0  -->  X >= Y
+  if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
+    if (EqPred == ICmpInst::ICMP_NE)
+      return getTrue(UnsignedICmp->getType());
+    return UnsignedICmp;
+  }
+
+  // X < Y && Y == 0  -->  false
+  if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
+      IsAnd)
+    return getFalse(UnsignedICmp->getType());
+
+  return nullptr;
+}
+
+// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+// of possible values cannot be satisfied.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+  ICmpInst::Predicate Pred0, Pred1;
+  ConstantInt *CI1, *CI2;
+  Value *V;
+
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+    return X;
+
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+                         m_ConstantInt(CI2))))
+   return nullptr;
+
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+    return nullptr;
+
+  Type *ITy = Op0->getType();
+
+  auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  bool isNSW = AddInst->hasNoSignedWrap();
+  bool isNUW = AddInst->hasNoUnsignedWrap();
+
+  const APInt &CI1V = CI1->getValue();
+  const APInt &CI2V = CI2->getValue();
+  const APInt Delta = CI2V - CI1V;
+  if (CI1V.isStrictlyPositive()) {
+    if (Delta == 2) {
+      if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
+        return getFalse(ITy);
+      if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+        return getFalse(ITy);
+    }
+    if (Delta == 1) {
+      if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT)
+        return getFalse(ITy);
+      if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+        return getFalse(ITy);
+    }
+  }
+  if (CI1V.getBoolValue() && isNUW) {
+    if (Delta == 2)
+      if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
+        return getFalse(ITy);
+    if (Delta == 1)
+      if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT)
+        return getFalse(ITy);
+  }
+
+  return nullptr;
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X & undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X & X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X & 0 = 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X & -1 = X
+  if (match(Op1, m_AllOnes()))
+    return Op0;
+
+  // A & ~A  =  ~A & A  =  0
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getNullValue(Op0->getType());
+
+  // (A | ?) & A = A
+  Value *A = nullptr, *B = nullptr;
+  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A & (A | ?) = A
+  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // A & (-A) = A if A is a power of two or zero.
+  if (match(Op0, m_Neg(m_Specific(Op1))) ||
+      match(Op1, m_Neg(m_Specific(Op0)))) {
+    if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+                               Q.DT))
+      return Op0;
+    if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+                               Q.DT))
+      return Op1;
+  }
+
+  if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+    if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+      if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS))
+        return V;
+      if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS))
+        return V;
+    }
+  }
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                             Q, MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+                             Q, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
+                                      MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+// contains all possible values.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+  ICmpInst::Predicate Pred0, Pred1;
+  ConstantInt *CI1, *CI2;
+  Value *V;
+
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+    return X;
+
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+                         m_ConstantInt(CI2))))
+   return nullptr;
+
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+    return nullptr;
+
+  Type *ITy = Op0->getType();
+
+  auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  bool isNSW = AddInst->hasNoSignedWrap();
+  bool isNUW = AddInst->hasNoUnsignedWrap();
+
+  const APInt &CI1V = CI1->getValue();
+  const APInt &CI2V = CI2->getValue();
+  const APInt Delta = CI2V - CI1V;
+  if (CI1V.isStrictlyPositive()) {
+    if (Delta == 2) {
+      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
+        return getTrue(ITy);
+      if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+        return getTrue(ITy);
+    }
+    if (Delta == 1) {
+      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
+        return getTrue(ITy);
+      if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+        return getTrue(ITy);
+    }
+  }
+  if (CI1V.getBoolValue() && isNUW) {
+    if (Delta == 2)
+      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
+        return getTrue(ITy);
+    if (Delta == 1)
+      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
+        return getTrue(ITy);
+  }
+
+  return nullptr;
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+                             unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X | undef -> -1
+  if (match(Op1, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // X | X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X | 0 = X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X | -1 = -1
+  if (match(Op1, m_AllOnes()))
+    return Op1;
+
+  // A | ~A  =  ~A | A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (A & ?) | A = A
+  Value *A = nullptr, *B = nullptr;
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A | (A & ?) = A
+  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // ~(A & ?) | A = -1
+  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op1 || B == Op1))
+    return Constant::getAllOnesValue(Op1->getType());
+
+  // A | ~(A & ?) = -1
+  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op0 || B == Op0))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+    if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+      if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS))
+        return V;
+      if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS))
+        return V;
+    }
+  }
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
+                                          MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+                             MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
+                                         MaxRecurse))
+      return V;
+
+  // (A & C)|(B & D)
+  Value *C = nullptr, *D = nullptr;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    ConstantInt *C1 = dyn_cast<ConstantInt>(C);
+    ConstantInt *C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+      // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      Value *V1, *V2;
+      if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
+          match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == B &&
+            MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+          return A;
+        if (V2 == B &&
+            MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+          return A;
+      }
+      // Or commutes, try both ways.
+      if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
+          match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == A &&
+            MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+          return B;
+        if (V2 == A &&
+            MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+          return B;
+      }
+    }
+  }
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                            const TargetLibraryInfo *TLI,
+                            const DominatorTree *DT, AssumptionCache *AC,
+                            const Instruction *CxtI) {
+  return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                          RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+                                      Ops, Q.DL, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // A ^ undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // A ^ 0 = A
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // A ^ A = 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // A ^ ~A  =  ~A ^ A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
+                                          MaxRecurse))
+    return V;
+
+  // Threading Xor over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+  // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A^B" and "A^C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+static Type *GetCompareTy(Value *Op) {
+  return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS".  Return such a value if found,
+/// otherwise return null.  Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+                                         Value *LHS, Value *RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (!SI)
+    return nullptr;
+  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+  if (!Cmp)
+    return nullptr;
+  Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+  if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+    return Cmp;
+  if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+      LHS == CmpRHS && RHS == CmpLHS)
+    return Cmp;
+  return nullptr;
+}
+
+// A significant optimization not implemented here is assuming that alloca
+// addresses are not equal to incoming argument values. They don't *alias*,
+// as we say, but that doesn't mean they aren't equal, so we take a
+// conservative approach.
+//
+// This is inspired in part by C++11 5.10p1:
+//   "Two pointers of the same type compare equal if and only if they are both
+//    null, both point to the same function, or both represent the same
+//    address."
+//
+// This is pretty permissive.
+//
+// It's also partly due to C11 6.5.9p6:
+//   "Two pointers compare equal if and only if both are null pointers, both are
+//    pointers to the same object (including a pointer to an object and a
+//    subobject at its beginning) or function, both are pointers to one past the
+//    last element of the same array object, or one is a pointer to one past the
+//    end of one array object and the other is a pointer to the start of a
+//    different array object that happens to immediately follow the first array
+//    object in the address space.)
+//
+// C11's version is more restrictive, however there's no reason why an argument
+// couldn't be a one-past-the-end value for a stack object in the caller and be
+// equal to the beginning of a stack object in the callee.
+//
+// If the C and C++ standards are ever made sufficiently restrictive in this
+// area, it may be possible to update LLVM's semantics accordingly and reinstate
+// this optimization.
+static Constant *computePointerICmp(const DataLayout &DL,
+                                    const TargetLibraryInfo *TLI,
+                                    CmpInst::Predicate Pred, Value *LHS,
+                                    Value *RHS) {
+  // First, skip past any trivial no-ops.
+  LHS = LHS->stripPointerCasts();
+  RHS = RHS->stripPointerCasts();
+
+  // A non-null pointer is not equal to a null pointer.
+  if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) &&
+      (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
+    return ConstantInt::get(GetCompareTy(LHS),
+                            !CmpInst::isTrueWhenEqual(Pred));
+
+  // We can only fold certain predicates on pointer comparisons.
+  switch (Pred) {
+  default:
+    return nullptr;
+
+    // Equality comaprisons are easy to fold.
+  case CmpInst::ICMP_EQ:
+  case CmpInst::ICMP_NE:
+    break;
+
+    // We can only handle unsigned relational comparisons because 'inbounds' on
+    // a GEP only protects against unsigned wrapping.
+  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    // However, we have to switch them to their signed variants to handle
+    // negative indices from the base pointer.
+    Pred = ICmpInst::getSignedPredicate(Pred);
+    break;
+  }
+
+  // Strip off any constant offsets so that we can reason about them.
+  // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets
+  // here and compare base addresses like AliasAnalysis does, however there are
+  // numerous hazards. AliasAnalysis and its utilities rely on special rules
+  // governing loads and stores which don't apply to icmps. Also, AliasAnalysis
+  // doesn't need to guarantee pointer inequality when it says NoAlias.
+  Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+  Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+
+  // If LHS and RHS are related via constant offsets to the same base
+  // value, we can replace it with an icmp which just compares the offsets.
+  if (LHS == RHS)
+    return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+
+  // Various optimizations for (in)equality comparisons.
+  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+    // Different non-empty allocations that exist at the same time have
+    // different addresses (if the program can tell). Global variables always
+    // exist, so they always exist during the lifetime of each other and all
+    // allocas. Two different allocas usually have different addresses...
+    //
+    // However, if there's an @llvm.stackrestore dynamically in between two
+    // allocas, they may have the same address. It's tempting to reduce the
+    // scope of the problem by only looking at *static* allocas here. That would
+    // cover the majority of allocas while significantly reducing the likelihood
+    // of having an @llvm.stackrestore pop up in the middle. However, it's not
+    // actually impossible for an @llvm.stackrestore to pop up in the middle of
+    // an entry block. Also, if we have a block that's not attached to a
+    // function, we can't tell if it's "static" under the current definition.
+    // Theoretically, this problem could be fixed by creating a new kind of
+    // instruction kind specifically for static allocas. Such a new instruction
+    // could be required to be at the top of the entry block, thus preventing it
+    // from being subject to a @llvm.stackrestore. Instcombine could even
+    // convert regular allocas into these special allocas. It'd be nifty.
+    // However, until then, this problem remains open.
+    //
+    // So, we'll assume that two non-empty allocas have different addresses
+    // for now.
+    //
+    // With all that, if the offsets are within the bounds of their allocations
+    // (and not one-past-the-end! so we can't use inbounds!), and their
+    // allocations aren't the same, the pointers are not equal.
+    //
+    // Note that it's not necessary to check for LHS being a global variable
+    // address, due to canonicalization and constant folding.
+    if (isa<AllocaInst>(LHS) &&
+        (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
+      ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
+      ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
+      uint64_t LHSSize, RHSSize;
+      if (LHSOffsetCI && RHSOffsetCI &&
+          getObjectSize(LHS, LHSSize, DL, TLI) &&
+          getObjectSize(RHS, RHSSize, DL, TLI)) {
+        const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
+        const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
+        if (!LHSOffsetValue.isNegative() &&
+            !RHSOffsetValue.isNegative() &&
+            LHSOffsetValue.ult(LHSSize) &&
+            RHSOffsetValue.ult(RHSSize)) {
+          return ConstantInt::get(GetCompareTy(LHS),
+                                  !CmpInst::isTrueWhenEqual(Pred));
+        }
+      }
+
+      // Repeat the above check but this time without depending on DataLayout
+      // or being able to compute a precise size.
+      if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
+          !cast<PointerType>(RHS->getType())->isEmptyTy() &&
+          LHSOffset->isNullValue() &&
+          RHSOffset->isNullValue())
+        return ConstantInt::get(GetCompareTy(LHS),
+                                !CmpInst::isTrueWhenEqual(Pred));
+    }
+
+    // Even if an non-inbounds GEP occurs along the path we can still optimize
+    // equality comparisons concerning the result. We avoid walking the whole
+    // chain again by starting where the last calls to
+    // stripAndComputeConstantOffsets left off and accumulate the offsets.
+    Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true);
+    Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true);
+    if (LHS == RHS)
+      return ConstantExpr::getICmp(Pred,
+                                   ConstantExpr::getAdd(LHSOffset, LHSNoBound),
+                                   ConstantExpr::getAdd(RHSOffset, RHSNoBound));
+
+    // If one side of the equality comparison must come from a noalias call
+    // (meaning a system memory allocation function), and the other side must
+    // come from a pointer that cannot overlap with dynamically-allocated
+    // memory within the lifetime of the current function (allocas, byval
+    // arguments, globals), then determine the comparison result here.
+    SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+    GetUnderlyingObjects(LHS, LHSUObjs, DL);
+    GetUnderlyingObjects(RHS, RHSUObjs, DL);
+
+    // Is the set of underlying objects all noalias calls?
+    auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
+      return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
+    };
+
+    // Is the set of underlying objects all things which must be disjoint from
+    // noalias calls. For allocas, we consider only static ones (dynamic
+    // allocas might be transformed into calls to malloc not simultaneously
+    // live with the compared-to allocation). For globals, we exclude symbols
+    // that might be resolve lazily to symbols in another dynamically-loaded
+    // library (and, thus, could be malloc'ed by the implementation).
+    auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
+      return std::all_of(Objects.begin(), Objects.end(),
+                         [](Value *V){
+                           if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+                             return AI->getParent() && AI->getParent()->getParent() &&
+                                    AI->isStaticAlloca();
+                           if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+                             return (GV->hasLocalLinkage() ||
+                                     GV->hasHiddenVisibility() ||
+                                     GV->hasProtectedVisibility() ||
+                                     GV->hasUnnamedAddr()) &&
+                                    !GV->isThreadLocal();
+                           if (const Argument *A = dyn_cast<Argument>(V))
+                             return A->hasByValAttr();
+                           return false;
+                         });
+    };
+
+    if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
+        (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
+        return ConstantInt::get(GetCompareTy(LHS),
+                                !CmpInst::isTrueWhenEqual(Pred));
+  }
+
+  // Otherwise, fail.
+  return nullptr;
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  Type *ITy = GetCompareTy(LHS); // The return type.
+  Type *OpTy = LHS->getType();   // The operand type.
+
+  // icmp X, X -> true/false
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  // Special case logic when the operands have i1 type.
+  if (OpTy->getScalarType()->isIntegerTy(1)) {
+    switch (Pred) {
+    default: break;
+    case ICmpInst::ICMP_EQ:
+      // X == 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_NE:
+      // X != 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGT:
+      // X >u 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGE:
+      // X >=u 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      if (isImpliedCondition(RHS, LHS, Q.DL))
+        return getTrue(ITy);
+      break;
+    case ICmpInst::ICMP_SGE:
+      /// For signed comparison, the values for an i1 are 0 and -1 
+      /// respectively. This maps into a truth table of:
+      /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
+      ///  0  |  0  |  1 (0 >= 0)   |  1
+      ///  0  |  1  |  1 (0 >= -1)  |  1
+      ///  1  |  0  |  0 (-1 >= 0)  |  0
+      ///  1  |  1  |  1 (-1 >= -1) |  1
+      if (isImpliedCondition(LHS, RHS, Q.DL))
+        return getTrue(ITy);
+      break;
+    case ICmpInst::ICMP_SLT:
+      // X <s 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLE:
+      // X <=s -1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if (isImpliedCondition(LHS, RHS, Q.DL))
+        return getTrue(ITy);
+      break;
+    }
+  }
+
+  // If we are comparing with zero then try hard since this is a common case.
+  if (match(RHS, m_Zero())) {
+    bool LHSKnownNonNegative, LHSKnownNegative;
+    switch (Pred) {
+    default: llvm_unreachable("Unknown ICmp predicate!");
+    case ICmpInst::ICMP_ULT:
+      return getFalse(ITy);
+    case ICmpInst::ICMP_UGE:
+      return getTrue(ITy);
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULE:
+      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return getFalse(ITy);
+      break;
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return getTrue(ITy);
+      break;
+    case ICmpInst::ICMP_SLT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (LHSKnownNegative)
+        return getTrue(ITy);
+      if (LHSKnownNonNegative)
+        return getFalse(ITy);
+      break;
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (LHSKnownNegative)
+        return getTrue(ITy);
+      if (LHSKnownNonNegative &&
+          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return getFalse(ITy);
+      break;
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (LHSKnownNegative)
+        return getFalse(ITy);
+      if (LHSKnownNonNegative)
+        return getTrue(ITy);
+      break;
+    case ICmpInst::ICMP_SGT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (LHSKnownNegative)
+        return getFalse(ITy);
+      if (LHSKnownNonNegative &&
+          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        return getTrue(ITy);
+      break;
+    }
+  }
+
+  // See if we are doing a comparison with a constant integer.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // Rule out tautological comparisons (eg., ult 0 or uge 0).
+    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+    if (RHS_CR.isEmptySet())
+      return ConstantInt::getFalse(CI->getContext());
+    if (RHS_CR.isFullSet())
+      return ConstantInt::getTrue(CI->getContext());
+
+    // Many binary operators with constant RHS have easy to compute constant
+    // range.  Use them to check whether the comparison is a tautology.
+    unsigned Width = CI->getBitWidth();
+    APInt Lower = APInt(Width, 0);
+    APInt Upper = APInt(Width, 0);
+    ConstantInt *CI2;
+    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'urem x, CI2' produces [0, CI2).
+      Upper = CI2->getValue();
+    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'srem x, CI2' produces (-|CI2|, |CI2|).
+      Upper = CI2->getValue().abs();
+      Lower = (-Upper) + 1;
+    } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
+      // 'udiv CI2, x' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
+    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (!CI2->isZero())
+        Upper = NegOne.udiv(CI2->getValue()) + 1;
+    } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
+      if (CI2->isMinSignedValue()) {
+        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+        Lower = CI2->getValue();
+        Upper = Lower.lshr(1) + 1;
+      } else {
+        // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
+        Upper = CI2->getValue().abs() + 1;
+        Lower = (-Upper) + 1;
+      }
+    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      APInt Val = CI2->getValue();
+      if (Val.isAllOnesValue()) {
+        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
+        Lower = IntMin + 1;
+        Upper = IntMax + 1;
+      } else if (Val.countLeadingZeros() < Width - 1) {
+        // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]
+        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
+        Lower = IntMin.sdiv(Val);
+        Upper = IntMax.sdiv(Val);
+        if (Lower.sgt(Upper))
+          std::swap(Lower, Upper);
+        Upper = Upper + 1;
+        assert(Upper != Lower && "Upper part of range has wrapped!");
+      }
+    } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
+      // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
+      Lower = CI2->getValue();
+      Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+    } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
+      if (CI2->isNegative()) {
+        // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
+        unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
+        Lower = CI2->getValue().shl(ShiftAmount);
+        Upper = CI2->getValue() + 1;
+      } else {
+        // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
+        unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
+        Lower = CI2->getValue();
+        Upper = CI2->getValue().shl(ShiftAmount) + 1;
+      }
+    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (CI2->getValue().ult(Width))
+        Upper = NegOne.lshr(CI2->getValue()) + 1;
+    } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
+      // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
+      unsigned ShiftAmount = Width - 1;
+      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+        ShiftAmount = CI2->getValue().countTrailingZeros();
+      Lower = CI2->getValue().lshr(ShiftAmount);
+      Upper = CI2->getValue() + 1;
+    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (CI2->getValue().ult(Width)) {
+        Lower = IntMin.ashr(CI2->getValue());
+        Upper = IntMax.ashr(CI2->getValue()) + 1;
+      }
+    } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
+      unsigned ShiftAmount = Width - 1;
+      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+        ShiftAmount = CI2->getValue().countTrailingZeros();
+      if (CI2->isNegative()) {
+        // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
+        Lower = CI2->getValue();
+        Upper = CI2->getValue().ashr(ShiftAmount) + 1;
+      } else {
+        // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
+        Lower = CI2->getValue().ashr(ShiftAmount);
+        Upper = CI2->getValue() + 1;
+      }
+    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+      // 'or x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
+    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+      // 'and x, CI2' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
+    } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
+      // 'add nuw x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
+    }
+
+    ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
+                                          : ConstantRange(Width, true);
+
+    if (auto *I = dyn_cast<Instruction>(LHS))
+      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+        LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+
+    if (!LHS_CR.isFullSet()) {
+      if (RHS_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+      if (RHS_CR.inverse().contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // If both operands have range metadata, use the metadata
+  // to simplify the comparison.
+  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+    auto RHS_Instr = dyn_cast<Instruction>(RHS);
+    auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+      auto RHS_CR = getConstantRangeFromMetadata(
+          *RHS_Instr->getMetadata(LLVMContext::MD_range));
+      auto LHS_CR = getConstantRangeFromMetadata(
+          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+      if (Satisfied_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+
+      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+                CmpInst::getInversePredicate(Pred), RHS_CR);
+      if (InversedSatisfied_CR.contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    Type *SrcTy = SrcOp->getType();
+    Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        Q, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), Q,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                        SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // icmp eq|ne X, Y -> false|true if X != Y
+  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+    LLVMContext &Ctx = LHS->getType()->getContext();
+    return Pred == ICmpInst::ICMP_NE ?
+      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+  }
+  
+  // Special logic for binary operators.
+  BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+  if (MaxRecurse && (LBO || RBO)) {
+    // Analyze the case when either LHS or RHS is an add instruction.
+    Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+    // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+    bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+    if (LBO && LBO->getOpcode() == Instruction::Add) {
+      A = LBO->getOperand(0); B = LBO->getOperand(1);
+      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+    }
+    if (RBO && RBO->getOpcode() == Instruction::Add) {
+      C = RBO->getOperand(0); D = RBO->getOperand(1);
+      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+    }
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+                                      Constant::getNullValue(RHS->getType()),
+                                      Q, MaxRecurse-1))
+        return V;
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred,
+                                      Constant::getNullValue(LHS->getType()),
+                                      C == LHS ? D : C, Q, MaxRecurse-1))
+        return V;
+
+    // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoLHSWrapProblem && NoRHSWrapProblem) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y, *Z;
+      if (A == C) {
+        // C + B == C + D  ->  B == D
+        Y = B;
+        Z = D;
+      } else if (A == D) {
+        // D + B == C + D  ->  B == C
+        Y = B;
+        Z = C;
+      } else if (B == C) {
+        // A + C == C + D  ->  A == D
+        Y = A;
+        Z = D;
+      } else {
+        assert(B == D);
+        // A + D == C + D  ->  A == C
+        Y = A;
+        Z = C;
+      }
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
+        return V;
+    }
+  }
+
+  // icmp pred (or X, Y), X
+  if (LBO && match(LBO, m_CombineOr(m_Or(m_Value(), m_Specific(RHS)),
+                                    m_Or(m_Specific(RHS), m_Value())))) {
+    if (Pred == ICmpInst::ICMP_ULT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_UGE)
+      return getTrue(ITy);
+  }
+  // icmp pred X, (or X, Y)
+  if (RBO && match(RBO, m_CombineOr(m_Or(m_Value(), m_Specific(LHS)),
+                                    m_Or(m_Specific(LHS), m_Value())))) {
+    if (Pred == ICmpInst::ICMP_ULE)
+      return getTrue(ITy);
+    if (Pred == ICmpInst::ICMP_UGT)
+      return getFalse(ITy);
+  }
+
+  // icmp pred (and X, Y), X
+  if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)),
+                                    m_And(m_Specific(RHS), m_Value())))) {
+    if (Pred == ICmpInst::ICMP_UGT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_ULE)
+      return getTrue(ITy);
+  }
+  // icmp pred X, (and X, Y)
+  if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)),
+                                    m_And(m_Specific(LHS), m_Value())))) {
+    if (Pred == ICmpInst::ICMP_UGE)
+      return getTrue(ITy);
+    if (Pred == ICmpInst::ICMP_ULT)
+      return getFalse(ITy);
+  }
+
+  // 0 - (zext X) pred C
+  if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      if (RHSC->getValue().isStrictlyPositive()) {
+        if (Pred == ICmpInst::ICMP_SLT)
+          return ConstantInt::getTrue(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_SGE)
+          return ConstantInt::getFalse(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(RHSC->getContext());
+      }
+      if (RHSC->getValue().isNonNegative()) {
+        if (Pred == ICmpInst::ICMP_SLE)
+          return ConstantInt::getTrue(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_SGT)
+          return ConstantInt::getFalse(RHSC->getContext());
+      }
+    }
+  }
+
+  // icmp pred (urem X, Y), Y
+  if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      return getFalse(ITy);
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      return getTrue(ITy);
+    }
+  }
+
+  // icmp pred X, (urem Y, X)
+  if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      return getTrue(ITy);
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+                     Q.CxtI, Q.DT);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      return getFalse(ITy);
+    }
+  }
+
+  // x udiv y <=u x.
+  if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
+    // icmp pred (X /u Y), X
+    if (Pred == ICmpInst::ICMP_UGT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_ULE)
+      return getTrue(ITy);
+  }
+
+  // handle:
+  //   CI2 << X == CI
+  //   CI2 << X != CI
+  //
+  //   where CI2 is a power of 2 and CI isn't
+  if (auto *CI = dyn_cast<ConstantInt>(RHS)) {
+    const APInt *CI2Val, *CIVal = &CI->getValue();
+    if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) &&
+        CI2Val->isPowerOf2()) {
+      if (!CIVal->isPowerOf2()) {
+        // CI2 << X can equal zero in some circumstances,
+        // this simplification is unsafe if CI is zero.
+        //
+        // We know it is safe if:
+        // - The shift is nsw, we can't shift out the one bit.
+        // - The shift is nuw, we can't shift out the one bit.
+        // - CI2 is one
+        // - CI isn't zero
+        if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+            *CI2Val == 1 || !CI->isZero()) {
+          if (Pred == ICmpInst::ICMP_EQ)
+            return ConstantInt::getFalse(RHS->getContext());
+          if (Pred == ICmpInst::ICMP_NE)
+            return ConstantInt::getTrue(RHS->getContext());
+        }
+      }
+      if (CIVal->isSignBit() && *CI2Val == 1) {
+        if (Pred == ICmpInst::ICMP_UGT)
+          return ConstantInt::getFalse(RHS->getContext());
+        if (Pred == ICmpInst::ICMP_ULE)
+          return ConstantInt::getTrue(RHS->getContext());
+      }
+    }
+  }
+
+  if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+      LBO->getOperand(1) == RBO->getOperand(1)) {
+    switch (LBO->getOpcode()) {
+    default: break;
+    case Instruction::UDiv:
+    case Instruction::LShr:
+      if (ICmpInst::isSigned(Pred))
+        break;
+      // fall-through
+    case Instruction::SDiv:
+    case Instruction::AShr:
+      if (!LBO->isExact() || !RBO->isExact())
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
+        return V;
+      break;
+    case Instruction::Shl: {
+      bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
+      bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+      if (!NUW && !NSW)
+        break;
+      if (!NSW && ICmpInst::isSigned(Pred))
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
+        return V;
+      break;
+    }
+    }
+  }
+
+  // Simplify comparisons involving max/min.
+  Value *A, *B;
+  CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+  CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+  // Signed variants on "max(a,b)>=a -> true".
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_SLE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_SGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_SGE:
+      // Always true.
+      return getTrue(ITy);
+    case CmpInst::ICMP_SLT:
+      // Always false.
+      return getFalse(ITy);
+    }
+  }
+
+  // Unsigned variants on "max(a,b)>=a -> true".
+  P = CmpInst::BAD_ICMP_PREDICATE;
+  if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_ULE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_UGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_UGE:
+      // Always true.
+      return getTrue(ITy);
+    case CmpInst::ICMP_ULT:
+      // Always false.
+      return getFalse(ITy);
+    }
+  }
+
+  // Variants on "max(x,y) >= min(x,z)".
+  Value *C, *D;
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+      match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+      (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_SGE)
+      // Always true.
+      return getTrue(ITy);
+    if (Pred == CmpInst::ICMP_SLT)
+      // Always false.
+      return getFalse(ITy);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_SLE)
+      // Always true.
+      return getTrue(ITy);
+    if (Pred == CmpInst::ICMP_SGT)
+      // Always false.
+      return getFalse(ITy);
+  } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_UGE)
+      // Always true.
+      return getTrue(ITy);
+    if (Pred == CmpInst::ICMP_ULT)
+      // Always false.
+      return getFalse(ITy);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_ULE)
+      // Always true.
+      return getTrue(ITy);
+    if (Pred == CmpInst::ICMP_UGT)
+      // Always false.
+      return getFalse(ITy);
+  }
+
+  // Simplify comparisons of related pointers using a powerful, recursive
+  // GEP-walk when we have target data available..
+  if (LHS->getType()->isPointerTy())
+    if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS))
+      return C;
+
+  if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
+    if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
+      if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
+          GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
+          (ICmpInst::isEquality(Pred) ||
+           (GLHS->isInBounds() && GRHS->isInBounds() &&
+            Pred == ICmpInst::getSignedPredicate(Pred)))) {
+        // The bases are equal and the indices are constant.  Build a constant
+        // expression GEP with the same indices and a null base pointer to see
+        // what constant folding can make out of it.
+        Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
+        SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
+        Constant *NewLHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesLHS);
+
+        SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
+        Constant *NewRHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesRHS);
+        return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
+      }
+    }
+  }
+
+  // If a bit is known to be zero for A and known to be one for B,
+  // then A and B cannot be equal.
+  if (ICmpInst::isEquality(Pred)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+      uint32_t BitWidth = CI->getBitWidth();
+      APInt LHSKnownZero(BitWidth, 0);
+      APInt LHSKnownOne(BitWidth, 0);
+      computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
+                       Q.CxtI, Q.DT);
+      const APInt &RHSVal = CI->getValue();
+      if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
+        return Pred == ICmpInst::ICMP_EQ
+                   ? ConstantInt::getFalse(CI->getContext())
+                   : ConstantInt::getTrue(CI->getContext());
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               FastMathFlags FMF, const Query &Q,
+                               unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Fold trivial predicates.
+  if (Pred == FCmpInst::FCMP_FALSE)
+    return ConstantInt::get(GetCompareTy(LHS), 0);
+  if (Pred == FCmpInst::FCMP_TRUE)
+    return ConstantInt::get(GetCompareTy(LHS), 1);
+
+  // UNO/ORD predicates can be trivially folded if NaNs are ignored.
+  if (FMF.noNaNs()) {
+    if (Pred == FCmpInst::FCMP_UNO)
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+    if (Pred == FCmpInst::FCMP_ORD)
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+  }
+
+  // fcmp pred x, undef  and  fcmp pred undef, x
+  // fold to true if unordered, false if ordered
+  if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
+    // Choosing NaN for the undef will always make unordered comparison succeed
+    // and ordered comparison fail.
+    return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+  }
+
+  // fcmp x,x -> true/false.  Not all compares are foldable.
+  if (LHS == RHS) {
+    if (CmpInst::isTrueWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+    if (CmpInst::isFalseWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+  }
+
+  // Handle fcmp with constant RHS
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (CFP->getValueAPF().isNaN()) {
+      if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+        return ConstantInt::getFalse(CFP->getContext());
+      assert(FCmpInst::isUnordered(Pred) &&
+             "Comparison must be either ordered or unordered!");
+      // True if unordered.
+      return ConstantInt::getTrue(CFP->getContext());
+    }
+    // Check whether the constant is an infinity.
+    if (CFP->getValueAPF().isInfinity()) {
+      if (CFP->getValueAPF().isNegative()) {
+        switch (Pred) {
+        case FCmpInst::FCMP_OLT:
+          // No value is ordered and less than negative infinity.
+          return ConstantInt::getFalse(CFP->getContext());
+        case FCmpInst::FCMP_UGE:
+          // All values are unordered with or at least negative infinity.
+          return ConstantInt::getTrue(CFP->getContext());
+        default:
+          break;
+        }
+      } else {
+        switch (Pred) {
+        case FCmpInst::FCMP_OGT:
+          // No value is ordered and greater than infinity.
+          return ConstantInt::getFalse(CFP->getContext());
+        case FCmpInst::FCMP_ULE:
+          // All values are unordered with and at most infinity.
+          return ConstantInt::getTrue(CFP->getContext());
+        default:
+          break;
+        }
+      }
+    }
+    if (CFP->getValueAPF().isZero()) {
+      switch (Pred) {
+      case FCmpInst::FCMP_UGE:
+        if (CannotBeOrderedLessThanZero(LHS))
+          return ConstantInt::getTrue(CFP->getContext());
+        break;
+      case FCmpInst::FCMP_OLT:
+        // X < 0
+        if (CannotBeOrderedLessThanZero(LHS))
+          return ConstantInt::getFalse(CFP->getContext());
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+      return V;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              FastMathFlags FMF, const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
+                            Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+}
+
+/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
+/// replaced with RepOp.
+static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                           const Query &Q,
+                                           unsigned MaxRecurse) {
+  // Trivial replacement.
+  if (V == Op)
+    return RepOp;
+
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return nullptr;
+
+  // If this is a binary operator, try to simplify it with the replaced op.
+  if (auto *B = dyn_cast<BinaryOperator>(I)) {
+    // Consider:
+    //   %cmp = icmp eq i32 %x, 2147483647
+    //   %add = add nsw i32 %x, 1
+    //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
+    //
+    // We can't replace %sel with %add unless we strip away the flags.
+    if (isa<OverflowingBinaryOperator>(B))
+      if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
+        return nullptr;
+    if (isa<PossiblyExactOperator>(B))
+      if (B->isExact())
+        return nullptr;
+
+    if (MaxRecurse) {
+      if (B->getOperand(0) == Op)
+        return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
+                             MaxRecurse - 1);
+      if (B->getOperand(1) == Op)
+        return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q,
+                             MaxRecurse - 1);
+    }
+  }
+
+  // Same for CmpInsts.
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+    if (MaxRecurse) {
+      if (C->getOperand(0) == Op)
+        return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q,
+                               MaxRecurse - 1);
+      if (C->getOperand(1) == Op)
+        return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q,
+                               MaxRecurse - 1);
+    }
+  }
+
+  // TODO: We could hand off more cases to instsimplify here.
+
+  // If all operands are constant after substituting Op for RepOp then we can
+  // constant fold the instruction.
+  if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
+    // Build a list of all constant operands.
+    SmallVector<Constant *, 8> ConstOps;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      if (I->getOperand(i) == Op)
+        ConstOps.push_back(CRepOp);
+      else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
+        ConstOps.push_back(COp);
+      else
+        break;
+    }
+
+    // All operands were constants, fold it.
+    if (ConstOps.size() == I->getNumOperands()) {
+      if (CmpInst *C = dyn_cast<CmpInst>(I))
+        return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+                                               ConstOps[1], Q.DL, Q.TLI);
+
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
+        if (!LI->isVolatile())
+          return ConstantFoldLoadFromConstPtr(ConstOps[0], Q.DL);
+
+      return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+                                      Q.DL, Q.TLI);
+    }
+  }
+
+  return nullptr;
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result.  If not, this returns null.
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+                                 Value *FalseVal, const Query &Q,
+                                 unsigned MaxRecurse) {
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (Constant *CB = dyn_cast<Constant>(CondVal)) {
+    if (CB->isAllOnesValue())
+      return TrueVal;
+    if (CB->isNullValue())
+      return FalseVal;
+  }
+
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return TrueVal;
+
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return TrueVal;
+    return FalseVal;
+  }
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
+
+  if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
+    unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
+    ICmpInst::Predicate Pred = ICI->getPredicate();
+    Value *CmpLHS = ICI->getOperand(0);
+    Value *CmpRHS = ICI->getOperand(1);
+    APInt MinSignedValue = APInt::getSignBit(BitWidth);
+    Value *X;
+    const APInt *Y;
+    bool TrueWhenUnset;
+    bool IsBitTest = false;
+    if (ICmpInst::isEquality(Pred) &&
+        match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
+        match(CmpRHS, m_Zero())) {
+      IsBitTest = true;
+      TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
+    } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+      X = CmpLHS;
+      Y = &MinSignedValue;
+      IsBitTest = true;
+      TrueWhenUnset = false;
+    } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+      X = CmpLHS;
+      Y = &MinSignedValue;
+      IsBitTest = true;
+      TrueWhenUnset = true;
+    }
+    if (IsBitTest) {
+      const APInt *C;
+      // (X & Y) == 0 ? X & ~Y : X  --> X
+      // (X & Y) != 0 ? X & ~Y : X  --> X & ~Y
+      if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+          *Y == ~*C)
+        return TrueWhenUnset ? FalseVal : TrueVal;
+      // (X & Y) == 0 ? X : X & ~Y  --> X & ~Y
+      // (X & Y) != 0 ? X : X & ~Y  --> X
+      if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+          *Y == ~*C)
+        return TrueWhenUnset ? FalseVal : TrueVal;
+
+      if (Y->isPowerOf2()) {
+        // (X & Y) == 0 ? X | Y : X  --> X | Y
+        // (X & Y) != 0 ? X | Y : X  --> X
+        if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+            *Y == *C)
+          return TrueWhenUnset ? TrueVal : FalseVal;
+        // (X & Y) == 0 ? X : X | Y  --> X
+        // (X & Y) != 0 ? X : X | Y  --> X | Y
+        if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+            *Y == *C)
+          return TrueWhenUnset ? TrueVal : FalseVal;
+      }
+    }
+    if (ICI->hasOneUse()) {
+      const APInt *C;
+      if (match(CmpRHS, m_APInt(C))) {
+        // X < MIN ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+          return FalseVal;
+        // X < MIN ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+          return FalseVal;
+        // X > MAX ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+          return FalseVal;
+        // X > MAX ? T : F  -->  F
+        if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+          return FalseVal;
+      }
+    }
+
+    // If we have an equality comparison then we know the value in one of the
+    // arms of the select. See if substituting this value into the arm and
+    // simplifying the result yields the same value as the other arm.
+    if (Pred == ICmpInst::ICMP_EQ) {
+      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              TrueVal ||
+          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              TrueVal)
+        return FalseVal;
+      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              FalseVal ||
+          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              FalseVal)
+        return FalseVal;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              FalseVal ||
+          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              FalseVal)
+        return TrueVal;
+      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+              TrueVal ||
+          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+              TrueVal)
+        return TrueVal;
+    }
+  }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+                                const DataLayout &DL,
+                                const TargetLibraryInfo *TLI,
+                                const DominatorTree *DT, AssumptionCache *AC,
+                                const Instruction *CxtI) {
+  return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
+                              Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+                              const Query &Q, unsigned) {
+  // The type of the GEP pointer operand.
+  unsigned AS =
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
+
+  // getelementptr P -> P.
+  if (Ops.size() == 1)
+    return Ops[0];
+
+  // Compute the (pointer) type returned by the GEP instruction.
+  Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
+  Type *GEPTy = PointerType::get(LastType, AS);
+  if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
+    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+
+  if (isa<UndefValue>(Ops[0]))
+    return UndefValue::get(GEPTy);
+
+  if (Ops.size() == 2) {
+    // getelementptr P, 0 -> P.
+    if (match(Ops[1], m_Zero()))
+      return Ops[0];
+
+    Type *Ty = SrcTy;
+    if (Ty->isSized()) {
+      Value *P;
+      uint64_t C;
+      uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty);
+      // getelementptr P, N -> P if P points to a type of zero size.
+      if (TyAllocSize == 0)
+        return Ops[0];
+
+      // The following transforms are only safe if the ptrtoint cast
+      // doesn't truncate the pointers.
+      if (Ops[1]->getType()->getScalarSizeInBits() ==
+          Q.DL.getPointerSizeInBits(AS)) {
+        auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
+          if (match(P, m_Zero()))
+            return Constant::getNullValue(GEPTy);
+          Value *Temp;
+          if (match(P, m_PtrToInt(m_Value(Temp))))
+            if (Temp->getType() == GEPTy)
+              return Temp;
+          return nullptr;
+        };
+
+        // getelementptr V, (sub P, V) -> P if P points to a type of size 1.
+        if (TyAllocSize == 1 &&
+            match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0])))))
+          if (Value *R = PtrToIntOrZero(P))
+            return R;
+
+        // getelementptr V, (ashr (sub P, V), C) -> Q
+        // if P points to a type of size 1 << C.
+        if (match(Ops[1],
+                  m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+                         m_ConstantInt(C))) &&
+            TyAllocSize == 1ULL << C)
+          if (Value *R = PtrToIntOrZero(P))
+            return R;
+
+        // getelementptr V, (sdiv (sub P, V), C) -> Q
+        // if P points to a type of size C.
+        if (match(Ops[1],
+                  m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+                         m_SpecificInt(TyAllocSize))))
+          if (Value *R = PtrToIntOrZero(P))
+            return R;
+      }
+    }
+  }
+
+  // Check to see if this is constant foldable.
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    if (!isa<Constant>(Ops[i]))
+      return nullptr;
+
+  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                        Ops.slice(1));
+}
+
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyGEPInst(
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+      Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+}
+
+/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
+/// can fold the result.  If not, this returns null.
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                      ArrayRef<unsigned> Idxs, const Query &Q,
+                                      unsigned) {
+  if (Constant *CAgg = dyn_cast<Constant>(Agg))
+    if (Constant *CVal = dyn_cast<Constant>(Val))
+      return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
+
+  // insertvalue x, undef, n -> x
+  if (match(Val, m_Undef()))
+    return Agg;
+
+  // insertvalue x, (extractvalue y, n), n
+  if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
+    if (EV->getAggregateOperand()->getType() == Agg->getType() &&
+        EV->getIndices() == Idxs) {
+      // insertvalue undef, (extractvalue y, n), n -> y
+      if (match(Agg, m_Undef()))
+        return EV->getAggregateOperand();
+
+      // insertvalue y, (extractvalue y, n), n -> y
+      if (Agg == EV->getAggregateOperand())
+        return Agg;
+    }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyInsertValueInst(
+    Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
+    const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
+    const Instruction *CxtI) {
+  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
+                                   RecursionLimit);
+}
+
+/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
+/// can fold the result.  If not, this returns null.
+static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+                                       const Query &, unsigned) {
+  if (auto *CAgg = dyn_cast<Constant>(Agg))
+    return ConstantFoldExtractValueInstruction(CAgg, Idxs);
+
+  // extractvalue x, (insertvalue y, elt, n), n -> elt
+  unsigned NumIdxs = Idxs.size();
+  for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr;
+       IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) {
+    ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices();
+    unsigned NumInsertValueIdxs = InsertValueIdxs.size();
+    unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs);
+    if (InsertValueIdxs.slice(0, NumCommonIdxs) ==
+        Idxs.slice(0, NumCommonIdxs)) {
+      if (NumIdxs == NumInsertValueIdxs)
+        return IVI->getInsertedValueOperand();
+      break;
+    }
+  }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+                                      const DataLayout &DL,
+                                      const TargetLibraryInfo *TLI,
+                                      const DominatorTree *DT,
+                                      AssumptionCache *AC,
+                                      const Instruction *CxtI) {
+  return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
+                                    RecursionLimit);
+}
+
+/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
+/// can fold the result.  If not, this returns null.
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+                                         unsigned) {
+  if (auto *CVec = dyn_cast<Constant>(Vec)) {
+    if (auto *CIdx = dyn_cast<Constant>(Idx))
+      return ConstantFoldExtractElementInstruction(CVec, CIdx);
+
+    // The index is not relevant if our vector is a splat.
+    if (auto *Splat = CVec->getSplatValue())
+      return Splat;
+
+    if (isa<UndefValue>(Vec))
+      return UndefValue::get(Vec->getType()->getVectorElementType());
+  }
+
+  // If extracting a specified index from the vector, see if we can recursively
+  // find a previously computed scalar that was inserted into the vector.
+  if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
+    if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
+      return Elt;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyExtractElementInst(
+    Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
+    const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
+  return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
+                                      RecursionLimit);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
+  // If all of the PHI's incoming values are the same then replace the PHI node
+  // with the common value.
+  Value *CommonValue = nullptr;
+  bool HasUndefInput = false;
+  for (Value *Incoming : PN->incoming_values()) {
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PN) continue;
+    if (isa<UndefValue>(Incoming)) {
+      // Remember that we saw an undef value, but otherwise ignore them.
+      HasUndefInput = true;
+      continue;
+    }
+    if (CommonValue && Incoming != CommonValue)
+      return nullptr;  // Not the same, bail out.
+    CommonValue = Incoming;
+  }
+
+  // If CommonValue is null then all of the incoming values were either undef or
+  // equal to the phi node itself.
+  if (!CommonValue)
+    return UndefValue::get(PN->getType());
+
+  // If we have a PHI node like phi(X, undef, X), where X is defined by some
+  // instruction, we cannot return X as the result of the PHI node unless it
+  // dominates the PHI block.
+  if (HasUndefInput)
+    return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+
+  return CommonValue;
+}
+
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+  if (Constant *C = dyn_cast<Constant>(Op))
+    return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI);
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT, AssumptionCache *AC,
+                               const Instruction *CxtI) {
+  return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+                             RecursionLimit);
+}
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                            const Query &Q, unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           Q, MaxRecurse);
+  case Instruction::FAdd:
+    return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
+  case Instruction::Sub:
+    return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           Q, MaxRecurse);
+  case Instruction::FSub:
+    return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::FMul:
+    return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FDiv:
+      return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FRem:
+      return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+  case Instruction::Shl:
+    return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           Q, MaxRecurse);
+  case Instruction::LShr:
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+  case Instruction::AShr:
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+  default:
+    if (Constant *CLHS = dyn_cast<Constant>(LHS))
+      if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+        Constant *COps[] = {CLHS, CRHS};
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.DL,
+                                        Q.TLI);
+      }
+
+    // If the operation is associative, try some generic simplifications.
+    if (Instruction::isAssociative(Opcode))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a select instruction check whether
+    // operating on either branch of the select always yields the same value.
+    if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a phi instruction, check whether
+    // operating on all incoming values of the phi always yields the same value.
+    if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
+        return V;
+
+    return nullptr;
+  }
+}
+
+/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
+static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                              const FastMathFlags &FMF, const Query &Q,
+                              unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::FAdd:
+    return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FSub:
+    return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FMul:
+    return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+  default:
+    return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+  }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                           const DataLayout &DL, const TargetLibraryInfo *TLI,
+                           const DominatorTree *DT, AssumptionCache *AC,
+                           const Instruction *CxtI) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+                         RecursionLimit);
+}
+
+Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                             const FastMathFlags &FMF, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+    return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
+static bool IsIdempotent(Intrinsic::ID ID) {
+  switch (ID) {
+  default: return false;
+
+  // Unary idempotent: f(f(x)) = f(x)
+  case Intrinsic::fabs:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+    return true;
+  }
+}
+
+template <typename IterTy>
+static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
+                                const Query &Q, unsigned MaxRecurse) {
+  Intrinsic::ID IID = F->getIntrinsicID();
+  unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
+  Type *ReturnType = F->getReturnType();
+
+  // Binary Ops
+  if (NumOperands == 2) {
+    Value *LHS = *ArgBegin;
+    Value *RHS = *(ArgBegin + 1);
+    if (IID == Intrinsic::usub_with_overflow ||
+        IID == Intrinsic::ssub_with_overflow) {
+      // X - X -> { 0, false }
+      if (LHS == RHS)
+        return Constant::getNullValue(ReturnType);
+
+      // X - undef -> undef
+      // undef - X -> undef
+      if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
+        return UndefValue::get(ReturnType);
+    }
+
+    if (IID == Intrinsic::uadd_with_overflow ||
+        IID == Intrinsic::sadd_with_overflow) {
+      // X + undef -> undef
+      if (isa<UndefValue>(RHS))
+        return UndefValue::get(ReturnType);
+    }
+
+    if (IID == Intrinsic::umul_with_overflow ||
+        IID == Intrinsic::smul_with_overflow) {
+      // X * 0 -> { 0, false }
+      if (match(RHS, m_Zero()))
+        return Constant::getNullValue(ReturnType);
+
+      // X * undef -> { 0, false }
+      if (match(RHS, m_Undef()))
+        return Constant::getNullValue(ReturnType);
+    }
+  }
+
+  // Perform idempotent optimizations
+  if (!IsIdempotent(IID))
+    return nullptr;
+
+  // Unary Ops
+  if (NumOperands == 1)
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+      if (II->getIntrinsicID() == IID)
+        return II;
+
+  return nullptr;
+}
+
+template <typename IterTy>
+static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
+                           const Query &Q, unsigned MaxRecurse) {
+  Type *Ty = V->getType();
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    Ty = PTy->getElementType();
+  FunctionType *FTy = cast<FunctionType>(Ty);
+
+  // call undef -> undef
+  if (isa<UndefValue>(V))
+    return UndefValue::get(FTy->getReturnType());
+
+  Function *F = dyn_cast<Function>(V);
+  if (!F)
+    return nullptr;
+
+  if (F->isIntrinsic())
+    if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
+      return Ret;
+
+  if (!canConstantFoldCallTo(F))
+    return nullptr;
+
+  SmallVector<Constant *, 4> ConstantArgs;
+  ConstantArgs.reserve(ArgEnd - ArgBegin);
+  for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
+    Constant *C = dyn_cast<Constant>(*I);
+    if (!C)
+      return nullptr;
+    ConstantArgs.push_back(C);
+  }
+
+  return ConstantFoldCall(F, ConstantArgs, Q.TLI);
+}
+
+Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
+                          User::op_iterator ArgEnd, const DataLayout &DL,
+                          const TargetLibraryInfo *TLI, const DominatorTree *DT,
+                          AssumptionCache *AC, const Instruction *CxtI) {
+  return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
+                        RecursionLimit);
+}
+
+Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
+                          const DataLayout &DL, const TargetLibraryInfo *TLI,
+                          const DominatorTree *DT, AssumptionCache *AC,
+                          const Instruction *CxtI) {
+  return ::SimplifyCall(V, Args.begin(), Args.end(),
+                        Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction.  If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
+                                 const TargetLibraryInfo *TLI,
+                                 const DominatorTree *DT, AssumptionCache *AC) {
+  Value *Result;
+
+  switch (I->getOpcode()) {
+  default:
+    Result = ConstantFoldInstruction(I, DL, TLI);
+    break;
+  case Instruction::FAdd:
+    Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Add:
+    Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+                             TLI, DT, AC, I);
+    break;
+  case Instruction::FSub:
+    Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Sub:
+    Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+                             TLI, DT, AC, I);
+    break;
+  case Instruction::FMul:
+    Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Mul:
+    Result =
+        SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::SDiv:
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::UDiv:
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::FDiv:
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::SRem:
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::URem:
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::FRem:
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Shl:
+    Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+                             TLI, DT, AC, I);
+    break;
+  case Instruction::LShr:
+    Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::AShr:
+    Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+                              AC, I);
+    break;
+  case Instruction::And:
+    Result =
+        SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Or:
+    Result =
+        SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Xor:
+    Result =
+        SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::ICmp:
+    Result =
+        SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0),
+                         I->getOperand(1), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::FCmp:
+    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::Select:
+    Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                                I->getOperand(2), DL, TLI, DT, AC, I);
+    break;
+  case Instruction::GetElementPtr: {
+    SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+    Result = SimplifyGEPInst(Ops, DL, TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::InsertValue: {
+    InsertValueInst *IV = cast<InsertValueInst>(I);
+    Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
+                                     IV->getInsertedValueOperand(),
+                                     IV->getIndices(), DL, TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::ExtractValue: {
+    auto *EVI = cast<ExtractValueInst>(I);
+    Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
+                                      EVI->getIndices(), DL, TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::ExtractElement: {
+    auto *EEI = cast<ExtractElementInst>(I);
+    Result = SimplifyExtractElementInst(
+        EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::PHI:
+    Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
+    break;
+  case Instruction::Call: {
+    CallSite CS(cast<CallInst>(I));
+    Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
+                          TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::Trunc:
+    Result =
+        SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
+    break;
+  }
+
+  // In general, it is possible for computeKnownBits to determine all bits in a
+  // value even when the operands are not all constants.
+  if (!Result && I->getType()->isIntegerTy()) {
+    unsigned BitWidth = I->getType()->getScalarSizeInBits();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+    if ((KnownZero | KnownOne).isAllOnesValue())
+      Result = ConstantInt::get(I->getContext(), KnownOne);
+  }
+
+  /// If called on unreachable code, the above logic may report that the
+  /// instruction simplified to itself.  Make life easier for users by
+  /// detecting that case here, returning a safe value instead.
+  return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// \brief Implementation of recursive simplification through an instructions
+/// uses.
+///
+/// This is the common implementation of the recursive simplification routines.
+/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
+/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
+/// instructions to process and attempt to simplify it using
+/// InstructionSimplify.
+///
+/// This routine returns 'true' only when *it* simplifies something. The passed
+/// in simplified value does not count toward this.
+static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
+                                              const TargetLibraryInfo *TLI,
+                                              const DominatorTree *DT,
+                                              AssumptionCache *AC) {
+  bool Simplified = false;
+  SmallSetVector<Instruction *, 8> Worklist;
+  const DataLayout &DL = I->getModule()->getDataLayout();
+
+  // If we have an explicit value to collapse to, do that round of the
+  // simplification loop by hand initially.
+  if (SimpleV) {
+    for (User *U : I->users())
+      if (U != I)
+        Worklist.insert(cast<Instruction>(U));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+
+    // Gracefully handle edge cases where the instruction is not wired into any
+    // parent block.
+    if (I->getParent())
+      I->eraseFromParent();
+  } else {
+    Worklist.insert(I);
+  }
+
+  // Note that we must test the size on each iteration, the worklist can grow.
+  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+    I = Worklist[Idx];
+
+    // See if this instruction simplifies.
+    SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC);
+    if (!SimpleV)
+      continue;
+
+    Simplified = true;
+
+    // Stash away all the uses of the old instruction so we can check them for
+    // recursive simplifications after a RAUW. This is cheaper than checking all
+    // uses of To on the recursive step in most cases.
+    for (User *U : I->users())
+      Worklist.insert(cast<Instruction>(U));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+
+    // Gracefully handle edge cases where the instruction is not wired into any
+    // parent block.
+    if (I->getParent())
+      I->eraseFromParent();
+  }
+  return Simplified;
+}
+
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
+                                          const TargetLibraryInfo *TLI,
+                                          const DominatorTree *DT,
+                                          AssumptionCache *AC) {
+  return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
+}
+
+bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
+                                         const TargetLibraryInfo *TLI,
+                                         const DominatorTree *DT,
+                                         AssumptionCache *AC) {
+  assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
+  assert(SimpleV && "Must provide a simplified value.");
+  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
+}
diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp
new file mode 100644
index 0000000..e3e785f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+  // There is a loop in this interval iff one of the predecessors of the header
+  // node lives in the interval.
+  for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+       I != E; ++I)
+    if (contains(*I))
+      return true;
+  return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+  OS << "-------------------------------------------------------------\n"
+       << "Interval Contents:\n";
+
+  // Print out all of the basic blocks in the interval...
+  for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+         E = Nodes.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Predecessors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+         E = Predecessors.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Successors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+         E = Successors.end(); I != E; ++I)
+    OS << **I << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 0000000..a0583e8
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+                "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    delete Intervals[i];
+  IntervalMap.clear();
+  Intervals.clear();
+  RootInterval = nullptr;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+  for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+  Intervals.push_back(I);
+
+  // Add mappings for all of the basic blocks in I to the IntervalPartition
+  for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+       It != End; ++It)
+    IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures.  After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+  BasicBlock *Header = Int->getHeaderNode();
+  for (Interval::succ_iterator I = Int->Successors.begin(),
+         E = Int->Successors.end(); I != E; ++I)
+    getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+  // Pass false to intervals_begin because we take ownership of it's memory
+  function_interval_iterator I = intervals_begin(&F, false);
+  assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+  return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph.  This takes an additional boolean parameter to
+// distinguish it from a copy constructor.  Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+  : FunctionPass(ID) {
+  assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+  // Pass false to intervals_begin because we take ownership of it's memory
+  interval_part_interval_iterator I = intervals_begin(IP, false);
+  assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+}
+
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
new file mode 100644
index 0000000..9f1edd2
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -0,0 +1,95 @@
+//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include <queue>
+
+using namespace llvm;
+
+void IDFCalculator::calculate(SmallVectorImpl<BasicBlock *> &PHIBlocks) {
+  // If we haven't computed dominator tree levels, do so now.
+  if (DomLevels.empty()) {
+    for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
+         DFI != DFE; ++DFI) {
+      DomLevels[*DFI] = DFI.getPathLength() - 1;
+    }
+  }
+
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              less_second> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (BasicBlock *BB : *DefBlocks) {
+    if (DomTreeNode *Node = DT.getNode(BB))
+      PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+  }
+
+  SmallVector<DomTreeNode *, 32> Worklist;
+  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
+  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
+
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+    VisitedWorklist.insert(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (auto Succ : successors(BB)) {
+        DomTreeNode *SuccNode = DT.getNode(Succ);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels.lookup(SuccNode);
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!VisitedPQ.insert(SuccNode).second)
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (useLiveIn && !LiveInBlocks->count(SuccBB))
+          continue;
+
+        PHIBlocks.emplace_back(SuccBB);
+        if (!DefBlocks->count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (auto DomChild : *Node) {
+        if (VisitedWorklist.insert(DomChild).second)
+          Worklist.push_back(DomChild);
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
new file mode 100644
index 0000000..0f0f31e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -0,0 +1,727 @@
+//===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lcg"
+
+static void findCallees(
+    SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited,
+    SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees,
+    DenseMap<Function *, size_t> &CalleeIndexMap) {
+  while (!Worklist.empty()) {
+    Constant *C = Worklist.pop_back_val();
+
+    if (Function *F = dyn_cast<Function>(C)) {
+      // Note that we consider *any* function with a definition to be a viable
+      // edge. Even if the function's definition is subject to replacement by
+      // some other module (say, a weak definition) there may still be
+      // optimizations which essentially speculate based on the definition and
+      // a way to check that the specific definition is in fact the one being
+      // used. For example, this could be done by moving the weak definition to
+      // a strong (internal) definition and making the weak definition be an
+      // alias. Then a test of the address of the weak function against the new
+      // strong definition's address would be an effective way to determine the
+      // safety of optimizing a direct call edge.
+      if (!F->isDeclaration() &&
+          CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) {
+        DEBUG(dbgs() << "    Added callable function: " << F->getName()
+                     << "\n");
+        Callees.push_back(F);
+      }
+      continue;
+    }
+
+    for (Value *Op : C->operand_values())
+      if (Visited.insert(cast<Constant>(Op)).second)
+        Worklist.push_back(cast<Constant>(Op));
+  }
+}
+
+LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
+    : G(&G), F(F), DFSNumber(0), LowLink(0) {
+  DEBUG(dbgs() << "  Adding functions called by '" << F.getName()
+               << "' to the graph.\n");
+
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  // Find all the potential callees in this function. First walk the
+  // instructions and add every operand which is a constant to the worklist.
+  for (BasicBlock &BB : F)
+    for (Instruction &I : BB)
+      for (Value *Op : I.operand_values())
+        if (Constant *C = dyn_cast<Constant>(Op))
+          if (Visited.insert(C).second)
+            Worklist.push_back(C);
+
+  // We've collected all the constant (and thus potentially function or
+  // function containing) operands to all of the instructions in the function.
+  // Process them (recursively) collecting every function found.
+  findCallees(Worklist, Visited, Callees, CalleeIndexMap);
+}
+
+void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) {
+  if (Node *N = G->lookup(Callee))
+    return insertEdgeInternal(*N);
+
+  CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size()));
+  Callees.push_back(&Callee);
+}
+
+void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) {
+  CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size()));
+  Callees.push_back(&CalleeN);
+}
+
+void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) {
+  auto IndexMapI = CalleeIndexMap.find(&Callee);
+  assert(IndexMapI != CalleeIndexMap.end() &&
+         "Callee not in the callee set for this caller?");
+
+  Callees[IndexMapI->second] = nullptr;
+  CalleeIndexMap.erase(IndexMapI);
+}
+
+LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
+  DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
+               << "\n");
+  for (Function &F : M)
+    if (!F.isDeclaration() && !F.hasLocalLinkage())
+      if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) {
+        DEBUG(dbgs() << "  Adding '" << F.getName()
+                     << "' to entry set of the graph.\n");
+        EntryNodes.push_back(&F);
+      }
+
+  // Now add entry nodes for functions reachable via initializers to globals.
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  for (GlobalVariable &GV : M.globals())
+    if (GV.hasInitializer())
+      if (Visited.insert(GV.getInitializer()).second)
+        Worklist.push_back(GV.getInitializer());
+
+  DEBUG(dbgs() << "  Adding functions referenced by global initializers to the "
+                  "entry set.\n");
+  findCallees(Worklist, Visited, EntryNodes, EntryIndexMap);
+
+  for (auto &Entry : EntryNodes) {
+    assert(!Entry.isNull() &&
+           "We can't have removed edges before we finish the constructor!");
+    if (Function *F = Entry.dyn_cast<Function *>())
+      SCCEntryNodes.push_back(F);
+    else
+      SCCEntryNodes.push_back(&Entry.get<Node *>()->getFunction());
+  }
+}
+
+LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
+    : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
+      EntryNodes(std::move(G.EntryNodes)),
+      EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)),
+      SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)),
+      DFSStack(std::move(G.DFSStack)),
+      SCCEntryNodes(std::move(G.SCCEntryNodes)),
+      NextDFSNumber(G.NextDFSNumber) {
+  updateGraphPtrs();
+}
+
+LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
+  BPA = std::move(G.BPA);
+  NodeMap = std::move(G.NodeMap);
+  EntryNodes = std::move(G.EntryNodes);
+  EntryIndexMap = std::move(G.EntryIndexMap);
+  SCCBPA = std::move(G.SCCBPA);
+  SCCMap = std::move(G.SCCMap);
+  LeafSCCs = std::move(G.LeafSCCs);
+  DFSStack = std::move(G.DFSStack);
+  SCCEntryNodes = std::move(G.SCCEntryNodes);
+  NextDFSNumber = G.NextDFSNumber;
+  updateGraphPtrs();
+  return *this;
+}
+
+void LazyCallGraph::SCC::insert(Node &N) {
+  N.DFSNumber = N.LowLink = -1;
+  Nodes.push_back(&N);
+  G->SCCMap[&N] = this;
+}
+
+bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const {
+  // Walk up the parents of this SCC and verify that we eventually find C.
+  SmallVector<const SCC *, 4> AncestorWorklist;
+  AncestorWorklist.push_back(this);
+  do {
+    const SCC *AncestorC = AncestorWorklist.pop_back_val();
+    if (AncestorC->isChildOf(C))
+      return true;
+    for (const SCC *ParentC : AncestorC->ParentSCCs)
+      AncestorWorklist.push_back(ParentC);
+  } while (!AncestorWorklist.empty());
+
+  return false;
+}
+
+void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+  assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
+
+  // Nothing changes about this SCC or any other.
+}
+
+void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+
+  SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+  assert(&CalleeC != this && "Callee must not be in this SCC.");
+  assert(CalleeC.isDescendantOf(*this) &&
+         "Callee must be a descendant of the Caller.");
+
+  // The only change required is to add this SCC to the parent set of the
+  // callee.
+  CalleeC.ParentSCCs.insert(this);
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
+
+  SCC &CallerC = *G->SCCMap.lookup(&CallerN);
+  assert(&CallerC != this && "Caller must not be in this SCC.");
+  assert(CallerC.isDescendantOf(*this) &&
+         "Caller must be a descendant of the Callee.");
+
+  // The algorithm we use for merging SCCs based on the cycle introduced here
+  // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse
+  // graph has the same cycle properties as the actual DAG of the SCCs, and
+  // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in
+  // many cases which should prune the search space.
+  //
+  // FIXME: We can get this pruning behavior even after the incremental SCC
+  // formation by leaving behind (conservative) DFS numberings in the nodes,
+  // and pruning the search with them. These would need to be cleverly updated
+  // during the removal of intra-SCC edges, but could be preserved
+  // conservatively.
+
+  // The set of SCCs that are connected to the caller, and thus will
+  // participate in the merged connected component.
+  SmallPtrSet<SCC *, 8> ConnectedSCCs;
+  ConnectedSCCs.insert(this);
+  ConnectedSCCs.insert(&CallerC);
+
+  // We build up a DFS stack of the parents chains.
+  SmallVector<std::pair<SCC *, SCC::parent_iterator>, 8> DFSSCCs;
+  SmallPtrSet<SCC *, 8> VisitedSCCs;
+  int ConnectedDepth = -1;
+  SCC *C = this;
+  parent_iterator I = parent_begin(), E = parent_end();
+  for (;;) {
+    while (I != E) {
+      SCC &ParentSCC = *I++;
+
+      // If we have already processed this parent SCC, skip it, and remember
+      // whether it was connected so we don't have to check the rest of the
+      // stack. This also handles when we reach a child of the 'this' SCC (the
+      // callee) which terminates the search.
+      if (ConnectedSCCs.count(&ParentSCC)) {
+        ConnectedDepth = std::max<int>(ConnectedDepth, DFSSCCs.size());
+        continue;
+      }
+      if (VisitedSCCs.count(&ParentSCC))
+        continue;
+
+      // We fully explore the depth-first space, adding nodes to the connected
+      // set only as we pop them off, so "recurse" by rotating to the parent.
+      DFSSCCs.push_back(std::make_pair(C, I));
+      C = &ParentSCC;
+      I = ParentSCC.parent_begin();
+      E = ParentSCC.parent_end();
+    }
+
+    // If we've found a connection anywhere below this point on the stack (and
+    // thus up the parent graph from the caller), the current node needs to be
+    // added to the connected set now that we've processed all of its parents.
+    if ((int)DFSSCCs.size() == ConnectedDepth) {
+      --ConnectedDepth; // We're finished with this connection.
+      ConnectedSCCs.insert(C);
+    } else {
+      // Otherwise remember that its parents don't ever connect.
+      assert(ConnectedDepth < (int)DFSSCCs.size() &&
+             "Cannot have a connected depth greater than the DFS depth!");
+      VisitedSCCs.insert(C);
+    }
+
+    if (DFSSCCs.empty())
+      break; // We've walked all the parents of the caller transitively.
+
+    // Pop off the prior node and position to unwind the depth first recursion.
+    std::tie(C, I) = DFSSCCs.pop_back_val();
+    E = C->parent_end();
+  }
+
+  // Now that we have identified all of the SCCs which need to be merged into
+  // a connected set with the inserted edge, merge all of them into this SCC.
+  // FIXME: This operation currently creates ordering stability problems
+  // because we don't use stably ordered containers for the parent SCCs or the
+  // connected SCCs.
+  unsigned NewNodeBeginIdx = Nodes.size();
+  for (SCC *C : ConnectedSCCs) {
+    if (C == this)
+      continue;
+    for (SCC *ParentC : C->ParentSCCs)
+      if (!ConnectedSCCs.count(ParentC))
+        ParentSCCs.insert(ParentC);
+    C->ParentSCCs.clear();
+
+    for (Node *N : *C) {
+      for (Node &ChildN : *N) {
+        SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+        if (&ChildC != C)
+          ChildC.ParentSCCs.erase(C);
+      }
+      G->SCCMap[N] = this;
+      Nodes.push_back(N);
+    }
+    C->Nodes.clear();
+  }
+  for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I)
+    for (Node &ChildN : **I) {
+      SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+      if (&ChildC != this)
+        ChildC.ParentSCCs.insert(this);
+    }
+
+  // We return the list of SCCs which were merged so that callers can
+  // invalidate any data they have associated with those SCCs. Note that these
+  // SCCs are no longer in an interesting state (they are totally empty) but
+  // the pointers will remain stable for the life of the graph itself.
+  return SmallVector<SCC *, 1>(ConnectedSCCs.begin(), ConnectedSCCs.end());
+}
+
+void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) {
+  // First remove it from the node.
+  CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+  assert(G->SCCMap.lookup(&CallerN) == this &&
+         "The caller must be a member of this SCC.");
+
+  SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+  assert(&CalleeC != this &&
+         "This API only supports the rmoval of inter-SCC edges.");
+
+  assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) ==
+             G->LeafSCCs.end() &&
+         "Cannot have a leaf SCC caller with a different SCC callee.");
+
+  bool HasOtherCallToCalleeC = false;
+  bool HasOtherCallOutsideSCC = false;
+  for (Node *N : *this) {
+    for (Node &OtherCalleeN : *N) {
+      SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN);
+      if (&OtherCalleeC == &CalleeC) {
+        HasOtherCallToCalleeC = true;
+        break;
+      }
+      if (&OtherCalleeC != this)
+        HasOtherCallOutsideSCC = true;
+    }
+    if (HasOtherCallToCalleeC)
+      break;
+  }
+  // Because the SCCs form a DAG, deleting such an edge cannot change the set
+  // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making
+  // the caller no longer a parent of the callee. Walk the other call edges
+  // in the caller to tell.
+  if (!HasOtherCallToCalleeC) {
+    bool Removed = CalleeC.ParentSCCs.erase(this);
+    (void)Removed;
+    assert(Removed &&
+           "Did not find the caller SCC in the callee SCC's parent list!");
+
+    // It may orphan an SCC if it is the last edge reaching it, but that does
+    // not violate any invariants of the graph.
+    if (CalleeC.ParentSCCs.empty())
+      DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName()
+                   << " -> " << CalleeN.getFunction().getName()
+                   << " edge orphaned the callee's SCC!\n");
+  }
+
+  // It may make the Caller SCC a leaf SCC.
+  if (!HasOtherCallOutsideSCC)
+    G->LeafSCCs.push_back(this);
+}
+
+void LazyCallGraph::SCC::internalDFS(
+    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
+    SmallVectorImpl<Node *> &PendingSCCStack, Node *N,
+    SmallVectorImpl<SCC *> &ResultSCCs) {
+  Node::iterator I = N->begin();
+  N->LowLink = N->DFSNumber = 1;
+  int NextDFSNumber = 2;
+  for (;;) {
+    assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+                                "before processing a node.");
+
+    // We simulate recursion by popping out of the nested loop and continuing.
+    Node::iterator E = N->end();
+    while (I != E) {
+      Node &ChildN = *I;
+      if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) {
+        // Check if we have reached a node in the new (known connected) set of
+        // this SCC. If so, the entire stack is necessarily in that set and we
+        // can re-start.
+        if (ChildSCC == this) {
+          insert(*N);
+          while (!PendingSCCStack.empty())
+            insert(*PendingSCCStack.pop_back_val());
+          while (!DFSStack.empty())
+            insert(*DFSStack.pop_back_val().first);
+          return;
+        }
+
+        // If this child isn't currently in this SCC, no need to process it.
+        // However, we do need to remove this SCC from its SCC's parent set.
+        ChildSCC->ParentSCCs.erase(this);
+        ++I;
+        continue;
+      }
+
+      if (ChildN.DFSNumber == 0) {
+        // Mark that we should start at this child when next this node is the
+        // top of the stack. We don't start at the next child to ensure this
+        // child's lowlink is reflected.
+        DFSStack.push_back(std::make_pair(N, I));
+
+        // Continue, resetting to the child node.
+        ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+        N = &ChildN;
+        I = ChildN.begin();
+        E = ChildN.end();
+        continue;
+      }
+
+      // Track the lowest link of the children, if any are still in the stack.
+      // Any child not on the stack will have a LowLink of -1.
+      assert(ChildN.LowLink != 0 &&
+             "Low-link must not be zero with a non-zero DFS number.");
+      if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+        N->LowLink = ChildN.LowLink;
+      ++I;
+    }
+
+    if (N->LowLink == N->DFSNumber) {
+      ResultSCCs.push_back(G->formSCC(N, PendingSCCStack));
+      if (DFSStack.empty())
+        return;
+    } else {
+      // At this point we know that N cannot ever be an SCC root. Its low-link
+      // is not its dfs-number, and we've processed all of its children. It is
+      // just sitting here waiting until some node further down the stack gets
+      // low-link == dfs-number and pops it off as well. Move it to the pending
+      // stack which is pulled into the next SCC to be formed.
+      PendingSCCStack.push_back(N);
+
+      assert(!DFSStack.empty() && "We shouldn't have an empty stack!");
+    }
+
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  }
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) {
+  // First remove it from the node.
+  CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+  // We return a list of the resulting *new* SCCs in postorder.
+  SmallVector<SCC *, 1> ResultSCCs;
+
+  // Direct recursion doesn't impact the SCC graph at all.
+  if (&CallerN == &CalleeN)
+    return ResultSCCs;
+
+  // The worklist is every node in the original SCC.
+  SmallVector<Node *, 1> Worklist;
+  Worklist.swap(Nodes);
+  for (Node *N : Worklist) {
+    // The nodes formerly in this SCC are no longer in any SCC.
+    N->DFSNumber = 0;
+    N->LowLink = 0;
+    G->SCCMap.erase(N);
+  }
+  assert(Worklist.size() > 1 && "We have to have at least two nodes to have an "
+                                "edge between them that is within the SCC.");
+
+  // The callee can already reach every node in this SCC (by definition). It is
+  // the only node we know will stay inside this SCC. Everything which
+  // transitively reaches Callee will also remain in the SCC. To model this we
+  // incrementally add any chain of nodes which reaches something in the new
+  // node set to the new node set. This short circuits one side of the Tarjan's
+  // walk.
+  insert(CalleeN);
+
+  // We're going to do a full mini-Tarjan's walk using a local stack here.
+  SmallVector<std::pair<Node *, Node::iterator>, 4> DFSStack;
+  SmallVector<Node *, 4> PendingSCCStack;
+  do {
+    Node *N = Worklist.pop_back_val();
+    if (N->DFSNumber == 0)
+      internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs);
+
+    assert(DFSStack.empty() && "Didn't flush the entire DFS stack!");
+    assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!");
+  } while (!Worklist.empty());
+
+  // Now we need to reconnect the current SCC to the graph.
+  bool IsLeafSCC = true;
+  for (Node *N : Nodes) {
+    for (Node &ChildN : *N) {
+      SCC &ChildSCC = *G->SCCMap.lookup(&ChildN);
+      if (&ChildSCC == this)
+        continue;
+      ChildSCC.ParentSCCs.insert(this);
+      IsLeafSCC = false;
+    }
+  }
+#ifndef NDEBUG
+  if (!ResultSCCs.empty())
+    assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new "
+                         "SCCs by removing this edge.");
+  if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(),
+                   [&](SCC *C) { return C == this; }))
+    assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child "
+                         "SCCs before we removed this edge.");
+#endif
+  // If this SCC stopped being a leaf through this edge removal, remove it from
+  // the leaf SCC list.
+  if (!IsLeafSCC && !ResultSCCs.empty())
+    G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
+                      G->LeafSCCs.end());
+
+  // Return the new list of SCCs.
+  return ResultSCCs;
+}
+
+void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) {
+  assert(SCCMap.empty() && DFSStack.empty() &&
+         "This method cannot be called after SCCs have been formed!");
+
+  return CallerN.insertEdgeInternal(Callee);
+}
+
+void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) {
+  assert(SCCMap.empty() && DFSStack.empty() &&
+         "This method cannot be called after SCCs have been formed!");
+
+  return CallerN.removeEdgeInternal(Callee);
+}
+
+LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
+  return *new (MappedN = BPA.Allocate()) Node(*this, F);
+}
+
+void LazyCallGraph::updateGraphPtrs() {
+  // Process all nodes updating the graph pointers.
+  {
+    SmallVector<Node *, 16> Worklist;
+    for (auto &Entry : EntryNodes)
+      if (Node *EntryN = Entry.dyn_cast<Node *>())
+        Worklist.push_back(EntryN);
+
+    while (!Worklist.empty()) {
+      Node *N = Worklist.pop_back_val();
+      N->G = this;
+      for (auto &Callee : N->Callees)
+        if (!Callee.isNull())
+          if (Node *CalleeN = Callee.dyn_cast<Node *>())
+            Worklist.push_back(CalleeN);
+    }
+  }
+
+  // Process all SCCs updating the graph pointers.
+  {
+    SmallVector<SCC *, 16> Worklist(LeafSCCs.begin(), LeafSCCs.end());
+
+    while (!Worklist.empty()) {
+      SCC *C = Worklist.pop_back_val();
+      C->G = this;
+      Worklist.insert(Worklist.end(), C->ParentSCCs.begin(),
+                      C->ParentSCCs.end());
+    }
+  }
+}
+
+LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN,
+                                           SmallVectorImpl<Node *> &NodeStack) {
+  // The tail of the stack is the new SCC. Allocate the SCC and pop the stack
+  // into it.
+  SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this);
+
+  while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) {
+    assert(NodeStack.back()->LowLink >= RootN->LowLink &&
+           "We cannot have a low link in an SCC lower than its root on the "
+           "stack!");
+    NewSCC->insert(*NodeStack.pop_back_val());
+  }
+  NewSCC->insert(*RootN);
+
+  // A final pass over all edges in the SCC (this remains linear as we only
+  // do this once when we build the SCC) to connect it to the parent sets of
+  // its children.
+  bool IsLeafSCC = true;
+  for (Node *SCCN : NewSCC->Nodes)
+    for (Node &SCCChildN : *SCCN) {
+      SCC &ChildSCC = *SCCMap.lookup(&SCCChildN);
+      if (&ChildSCC == NewSCC)
+        continue;
+      ChildSCC.ParentSCCs.insert(NewSCC);
+      IsLeafSCC = false;
+    }
+
+  // For the SCCs where we fine no child SCCs, add them to the leaf list.
+  if (IsLeafSCC)
+    LeafSCCs.push_back(NewSCC);
+
+  return NewSCC;
+}
+
+LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
+  Node *N;
+  Node::iterator I;
+  if (!DFSStack.empty()) {
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  } else {
+    // If we've handled all candidate entry nodes to the SCC forest, we're done.
+    do {
+      if (SCCEntryNodes.empty())
+        return nullptr;
+
+      N = &get(*SCCEntryNodes.pop_back_val());
+    } while (N->DFSNumber != 0);
+    I = N->begin();
+    N->LowLink = N->DFSNumber = 1;
+    NextDFSNumber = 2;
+  }
+
+  for (;;) {
+    assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+                                "before placing a node onto the stack.");
+
+    Node::iterator E = N->end();
+    while (I != E) {
+      Node &ChildN = *I;
+      if (ChildN.DFSNumber == 0) {
+        // Mark that we should start at this child when next this node is the
+        // top of the stack. We don't start at the next child to ensure this
+        // child's lowlink is reflected.
+        DFSStack.push_back(std::make_pair(N, N->begin()));
+
+        // Recurse onto this node via a tail call.
+        assert(!SCCMap.count(&ChildN) &&
+               "Found a node with 0 DFS number but already in an SCC!");
+        ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+        N = &ChildN;
+        I = ChildN.begin();
+        E = ChildN.end();
+        continue;
+      }
+
+      // Track the lowest link of the children, if any are still in the stack.
+      assert(ChildN.LowLink != 0 &&
+             "Low-link must not be zero with a non-zero DFS number.");
+      if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+        N->LowLink = ChildN.LowLink;
+      ++I;
+    }
+
+    if (N->LowLink == N->DFSNumber)
+      // Form the new SCC out of the top of the DFS stack.
+      return formSCC(N, PendingSCCStack);
+
+    // At this point we know that N cannot ever be an SCC root. Its low-link
+    // is not its dfs-number, and we've processed all of its children. It is
+    // just sitting here waiting until some node further down the stack gets
+    // low-link == dfs-number and pops it off as well. Move it to the pending
+    // stack which is pulled into the next SCC to be formed.
+    PendingSCCStack.push_back(N);
+
+    assert(!DFSStack.empty() && "We never found a viable root!");
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  }
+}
+
+char LazyCallGraphAnalysis::PassID;
+
+LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
+                       SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
+  // Recurse depth first through the nodes.
+  for (LazyCallGraph::Node &ChildN : N)
+    if (Printed.insert(&ChildN).second)
+      printNodes(OS, ChildN, Printed);
+
+  OS << "  Call edges in function: " << N.getFunction().getName() << "\n";
+  for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I)
+    OS << "    -> " << I->getFunction().getName() << "\n";
+
+  OS << "\n";
+}
+
+static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) {
+  ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end());
+  OS << "  SCC with " << SCCSize << " functions:\n";
+
+  for (LazyCallGraph::Node *N : SCC)
+    OS << "    " << N->getFunction().getName() << "\n";
+
+  OS << "\n";
+}
+
+PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M,
+                                                ModuleAnalysisManager *AM) {
+  LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
+
+  OS << "Printing the call graph for module: " << M.getModuleIdentifier()
+     << "\n\n";
+
+  SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
+  for (LazyCallGraph::Node &N : G)
+    if (Printed.insert(&N).second)
+      printNodes(OS, N, Printed);
+
+  for (LazyCallGraph::SCC &SCC : G.postorder_sccs())
+    printSCC(OS, SCC);
+
+  return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 0000000..0d1d34e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1425 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <stack>
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "lazy-value-info"
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+  FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// This is the information tracked by LazyValueInfo for each value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+  enum LatticeValueTy {
+    /// This Value has no known value yet.
+    undefined,
+
+    /// This Value has a specific constant value.
+    constant,
+
+    /// This Value is known to not have the specified value.
+    notconstant,
+
+    /// The Value falls within this range.
+    constantrange,
+
+    /// This value is not known to be constant, and we know that it has a value.
+    overdefined
+  };
+
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'notconstant' value.
+  LatticeValueTy Tag;
+  Constant *Val;
+  ConstantRange Range;
+
+public:
+  LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
+
+  static LVILatticeVal get(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getNot(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markNotConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getRange(ConstantRange CR) {
+    LVILatticeVal Res;
+    Res.markConstantRange(CR);
+    return Res;
+  }
+  static LVILatticeVal getOverdefined() {
+    LVILatticeVal Res;
+    Res.markOverdefined();
+    return Res;
+  }
+  
+  bool isUndefined() const     { return Tag == undefined; }
+  bool isConstant() const      { return Tag == constant; }
+  bool isNotConstant() const   { return Tag == notconstant; }
+  bool isConstantRange() const { return Tag == constantrange; }
+  bool isOverdefined() const   { return Tag == overdefined; }
+
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val;
+  }
+
+  Constant *getNotConstant() const {
+    assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+    return Val;
+  }
+
+  ConstantRange getConstantRange() const {
+    assert(isConstantRange() &&
+           "Cannot get the constant-range of a non-constant-range!");
+    return Range;
+  }
+
+  /// Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    Tag = overdefined;
+    return true;
+  }
+
+  /// Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() == V) &&
+           "Marking constant with different value");
+    assert(isUndefined());
+    Tag = constant;
+    Val = V;
+    return true;
+  }
+
+  /// Return true if this is a change in status.
+  bool markNotConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() != V) &&
+           "Marking constant !constant with same value");
+    assert((!isNotConstant() || getNotConstant() == V) &&
+           "Marking !constant with different value");
+    assert(isUndefined() || isConstant());
+    Tag = notconstant;
+    Val = V;
+    return true;
+  }
+
+  /// Return true if this is a change in status.
+  bool markConstantRange(const ConstantRange NewR) {
+    if (isConstantRange()) {
+      if (NewR.isEmptySet())
+        return markOverdefined();
+
+      bool changed = Range != NewR;
+      Range = NewR;
+      return changed;
+    }
+
+    assert(isUndefined());
+    if (NewR.isEmptySet())
+      return markOverdefined();
+
+    Tag = constantrange;
+    Range = NewR;
+    return true;
+  }
+
+  /// Merge the specified lattice value into this one, updating this
+  /// one and returning true if anything changed.
+  bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
+    if (RHS.isUndefined() || isOverdefined()) return false;
+    if (RHS.isOverdefined()) return markOverdefined();
+
+    if (isUndefined()) {
+      Tag = RHS.Tag;
+      Val = RHS.Val;
+      Range = RHS.Range;
+      return true;
+    }
+
+    if (isConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        if (ConstantInt *Res =
+                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+                    CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
+          if (Res->isOne())
+            return markNotConstant(RHS.getNotConstant());
+
+        return markOverdefined();
+      }
+
+      // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+      // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+      // a function. The correct result is to pick up RHS.
+
+      return markOverdefined();
+    }
+
+    if (isNotConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        if (ConstantInt *Res =
+                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+                    CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
+          if (Res->isOne())
+            return false;
+
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      return markOverdefined();
+    }
+
+    assert(isConstantRange() && "New LVILattice type?");
+    if (!RHS.isConstantRange())
+      return markOverdefined();
+
+    ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+    if (NewR.isFullSet())
+      return markOverdefined();
+    return markConstantRange(NewR);
+  }
+};
+
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+    LLVM_ATTRIBUTE_USED;
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+  if (Val.isUndefined())
+    return OS << "undefined";
+  if (Val.isOverdefined())
+    return OS << "overdefined";
+
+  if (Val.isNotConstant())
+    return OS << "notconstant<" << *Val.getNotConstant() << '>';
+  else if (Val.isConstantRange())
+    return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+              << Val.getConstantRange().getUpper() << '>';
+  return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+//                          LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// A callback value handle updates the cache when values are erased.
+  class LazyValueInfoCache;
+  struct LVIValueHandle final : public CallbackVH {
+    LazyValueInfoCache *Parent;
+
+    LVIValueHandle(Value *V, LazyValueInfoCache *P)
+      : CallbackVH(V), Parent(P) { }
+
+    void deleted() override;
+    void allUsesReplacedWith(Value *V) override {
+      deleted();
+    }
+  };
+}
+
+namespace {
+  /// This is the cache kept by LazyValueInfo which
+  /// maintains information about queries across the clients' queries.
+  class LazyValueInfoCache {
+    /// This is all of the cached block information for exactly one Value*.
+    /// The entries are sorted by the BasicBlock* of the
+    /// entries, allowing us to do a lookup with a binary search.
+    /// Over-defined lattice values are recorded in OverDefinedCache to reduce
+    /// memory overhead.
+    typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
+        ValueCacheEntryTy;
+
+    /// This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+    /// This tracks, on a per-block basis, the set of values that are
+    /// over-defined at the end of that block.
+    typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+        OverDefinedCacheTy;
+    OverDefinedCacheTy OverDefinedCache;
+
+    /// Keep track of all blocks that we have ever seen, so we
+    /// don't spend time removing unused blocks from our caches.
+    DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
+    /// This stack holds the state of the value solver during a query.
+    /// It basically emulates the callstack of the naive
+    /// recursive value lookup process.
+    std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+    /// Keeps track of which block-value pairs are in BlockValueStack.
+    DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet;
+
+    /// Push BV onto BlockValueStack unless it's already in there.
+    /// Returns true on success.
+    bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
+      if (!BlockValueSet.insert(BV).second)
+        return false;  // It's already in the stack.
+
+      BlockValueStack.push(BV);
+      return true;
+    }
+
+    AssumptionCache *AC;  ///< A pointer to the cache of @llvm.assume calls.
+    const DataLayout &DL; ///< A mandatory DataLayout
+    DominatorTree *DT;    ///< An optional DT pointer.
+
+    friend struct LVIValueHandle;
+
+    void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+      SeenBlocks.insert(BB);
+
+      // Insert over-defined values into their own cache to reduce memory
+      // overhead.
+      if (Result.isOverdefined())
+        OverDefinedCache[BB].insert(Val);
+      else
+        lookup(Val)[BB] = Result;
+    }
+
+    LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+    bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+                      LVILatticeVal &Result,
+                      Instruction *CxtI = nullptr);
+    bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+    // These methods process one work item and may add more. A false value
+    // returned means that the work item was not completely processed and must
+    // be revisited after going through the new items.
+    bool solveBlockValue(Value *Val, BasicBlock *BB);
+    bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                 Value *Val, BasicBlock *BB);
+    bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                PHINode *PN, BasicBlock *BB);
+    bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                      Instruction *BBI, BasicBlock *BB);
+    void mergeAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+                                            Instruction *BBI);
+
+    void solve();
+
+    ValueCacheEntryTy &lookup(Value *V) {
+      return ValueCache[LVIValueHandle(V, this)];
+    }
+
+    bool isOverdefined(Value *V, BasicBlock *BB) const {
+      auto ODI = OverDefinedCache.find(BB);
+
+      if (ODI == OverDefinedCache.end())
+        return false;
+
+      return ODI->second.count(V);
+    }
+
+    bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
+      if (isOverdefined(V, BB))
+        return true;
+
+      LVIValueHandle ValHandle(V, this);
+      auto I = ValueCache.find(ValHandle);
+      if (I == ValueCache.end())
+        return false;
+
+      return I->second.count(BB);
+    }
+
+    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
+      if (isOverdefined(V, BB))
+        return LVILatticeVal::getOverdefined();
+
+      return lookup(V)[BB];
+    }
+    
+  public:
+    /// This is the query interface to determine the lattice
+    /// value for the specified Value* at the end of the specified block.
+    LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB,
+                                  Instruction *CxtI = nullptr);
+
+    /// This is the query interface to determine the lattice
+    /// value for the specified Value* at the specified instruction (generally
+    /// from an assume intrinsic).
+    LVILatticeVal getValueAt(Value *V, Instruction *CxtI);
+
+    /// This is the query interface to determine the lattice
+    /// value for the specified Value* that is true on the specified edge.
+    LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
+                                 Instruction *CxtI = nullptr);
+
+    /// This is the update interface to inform the cache that an edge from
+    /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
+    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+    /// This is part of the update interface to inform the cache
+    /// that a block has been deleted.
+    void eraseBlock(BasicBlock *BB);
+
+    /// clear - Empty the cache.
+    void clear() {
+      SeenBlocks.clear();
+      ValueCache.clear();
+      OverDefinedCache.clear();
+    }
+
+    LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
+                       DominatorTree *DT = nullptr)
+        : AC(AC), DL(DL), DT(DT) {}
+  };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+  for (auto &I : Parent->OverDefinedCache) {
+    SmallPtrSetImpl<Value *> &ValueSet = I.second;
+    if (ValueSet.count(getValPtr()))
+      ValueSet.erase(getValPtr());
+    if (ValueSet.empty())
+      ToErase.push_back(I.first);
+  }
+  for (auto &BB : ToErase)
+    Parent->OverDefinedCache.erase(BB);
+
+  // This erasure deallocates *this, so it MUST happen after we're done
+  // using any and all members of *this.
+  Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  // Shortcut if we have never seen this block.
+  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  if (I == SeenBlocks.end())
+    return;
+  SeenBlocks.erase(I);
+
+  auto ODI = OverDefinedCache.find(BB);
+  if (ODI != OverDefinedCache.end())
+    OverDefinedCache.erase(ODI);
+
+  for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+    I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+  while (!BlockValueStack.empty()) {
+    std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+    assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
+
+    if (solveBlockValue(e.second, e.first)) {
+      // The work item was completely processed.
+      assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
+      assert(hasCachedValueInfo(e.second, e.first) &&
+             "Result should be in cache!");
+
+      BlockValueStack.pop();
+      BlockValueSet.erase(e);
+    } else {
+      // More work needs to be done before revisiting.
+      assert(BlockValueStack.top() != e && "Stack should have been pushed!");
+    }
+  }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (isa<Constant>(Val))
+    return true;
+
+  return hasCachedValueInfo(Val, BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val))
+    return LVILatticeVal::get(VC);
+
+  SeenBlocks.insert(BB);
+  return getCachedValueInfo(Val, BB);
+}
+
+static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
+  switch (BBI->getOpcode()) {
+  default: break;
+  case Instruction::Load:
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) 
+      if (isa<IntegerType>(BBI->getType())) {
+        ConstantRange Result = getConstantRangeFromMetadata(*Ranges);
+        return LVILatticeVal::getRange(Result);
+      }
+    break;
+  };
+  // Nothing known - Note that we do not want overdefined here.  We may know
+  // something else about the value and not having range metadata shouldn't
+  // cause us to throw away those facts.
+  return LVILatticeVal();
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+  if (isa<Constant>(Val))
+    return true;
+
+  if (hasCachedValueInfo(Val, BB)) {
+    // If we have a cached value, use that.
+    DEBUG(dbgs() << "  reuse BB '" << BB->getName()
+                 << "' val=" << getCachedValueInfo(Val, BB) << '\n');
+
+    // Since we're reusing a cached value, we don't need to update the
+    // OverDefinedCache. The cache will have been properly updated whenever the
+    // cached value was inserted.
+    return true;
+  }
+
+  // Hold off inserting this value into the Cache in case we have to return
+  // false and come back later.
+  LVILatticeVal Res;
+
+  Instruction *BBI = dyn_cast<Instruction>(Val);
+  if (!BBI || BBI->getParent() != BB) {
+    if (!solveBlockValueNonLocal(Res, Val, BB))
+      return false;
+   insertResult(Val, BB, Res);
+   return true;
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+    if (!solveBlockValuePHINode(Res, PN, BB))
+      return false;
+    insertResult(Val, BB, Res);
+    return true;
+  }
+
+  // If this value is a nonnull pointer, record it's range and bailout.
+  PointerType *PT = dyn_cast<PointerType>(BBI->getType());
+  if (PT && isKnownNonNull(BBI)) {
+    Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
+    insertResult(Val, BB, Res);
+    return true;
+  }
+
+  // If this is an instruction which supports range metadata, return the
+  // implied range.  TODO: This should be an intersection, not a union.
+  Res.mergeIn(getFromRangeMetadata(BBI), DL);
+
+  // We can only analyze the definitions of certain classes of instructions
+  // (integral binops and casts at the moment), so bail if this isn't one.
+  LVILatticeVal Result;
+  if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+     !BBI->getType()->isIntegerTy()) {
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    Res.markOverdefined();
+    insertResult(Val, BB, Res);
+    return true;
+  }
+
+  // FIXME: We're currently limited to binops with a constant RHS.  This should
+  // be improved.
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+  if (BO && !isa<ConstantInt>(BO->getOperand(1))) { 
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+
+    Res.markOverdefined();
+    insertResult(Val, BB, Res);
+    return true;
+  }
+
+  if (!solveBlockValueConstantRange(Res, BBI, BB))
+    return false;
+  insertResult(Val, BB, Res);
+  return true;
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    return L->getPointerAddressSpace() == 0 &&
+           GetUnderlyingObject(L->getPointerOperand(),
+                               L->getModule()->getDataLayout()) == Ptr;
+  }
+  if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+    return S->getPointerAddressSpace() == 0 &&
+           GetUnderlyingObject(S->getPointerOperand(),
+                               S->getModule()->getDataLayout()) == Ptr;
+  }
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+    if (MI->isVolatile()) return false;
+
+    // FIXME: check whether it has a valuerange that excludes zero?
+    ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+    if (!Len || Len->isZero()) return false;
+
+    if (MI->getDestAddressSpace() == 0)
+      if (GetUnderlyingObject(MI->getRawDest(),
+                              MI->getModule()->getDataLayout()) == Ptr)
+        return true;
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+      if (MTI->getSourceAddressSpace() == 0)
+        if (GetUnderlyingObject(MTI->getRawSource(),
+                                MTI->getModule()->getDataLayout()) == Ptr)
+          return true;
+  }
+  return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                                 Value *Val, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // If this is a pointer, and there's a load from that pointer in this BB,
+  // then we know that the pointer can't be NULL.
+  bool NotNull = false;
+  if (Val->getType()->isPointerTy()) {
+    if (isKnownNonNull(Val)) {
+      NotNull = true;
+    } else {
+      const DataLayout &DL = BB->getModule()->getDataLayout();
+      Value *UnderlyingVal = GetUnderlyingObject(Val, DL);
+      // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
+      // inside InstructionDereferencesPointer either.
+      if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) {
+        for (Instruction &I : *BB) {
+          if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
+            NotNull = true;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // If this is the entry block, we must be asking about an argument.  The
+  // value is overdefined.
+  if (BB == &BB->getParent()->getEntryBlock()) {
+    assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+    if (NotNull) {
+      PointerType *PTy = cast<PointerType>(Val->getType());
+      Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+    } else {
+      Result.markOverdefined();
+    }
+    BBLV = Result;
+    return true;
+  }
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult, DL);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      // If we previously determined that this is a pointer that can't be null
+      // then return that rather than giving up entirely.
+      if (NotNull) {
+        PointerType *PTy = cast<PointerType>(Val->getType());
+        Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+      }
+
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined());
+  BBLV = Result;
+  return true;
+}
+
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                                PHINode *PN, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PhiBB = PN->getIncomingBlock(i);
+    Value *PhiVal = PN->getIncomingValue(i);
+    LVILatticeVal EdgeResult;
+    // Note that we can provide PN as the context value to getEdgeValue, even
+    // though the results will be cached, because PN is the value being used as
+    // the cache key in the caller.
+    EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult, DL);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+  BBLV = Result;
+  return true;
+}
+
+static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+                                      LVILatticeVal &Result,
+                                      bool isTrueDest = true);
+
+// If we can determine a constant range for the value Val in the context
+// provided by the instruction BBI, then merge it into BBLV. If we did find a
+// constant range, return true.
+void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
+                                                            LVILatticeVal &BBLV,
+                                                            Instruction *BBI) {
+  BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
+  if (!BBI)
+    return;
+
+  for (auto &AssumeVH : AC->assumptions()) {
+    if (!AssumeVH)
+      continue;
+    auto *I = cast<CallInst>(AssumeVH);
+    if (!isValidAssumeForContext(I, BBI, DT))
+      continue;
+
+    Value *C = I->getArgOperand(0);
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
+      LVILatticeVal Result;
+      if (getValueFromFromCondition(Val, ICI, Result)) {
+        if (BBLV.isOverdefined())
+          BBLV = Result;
+        else
+          BBLV.mergeIn(Result, DL);
+      }
+    }
+  }
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                                      Instruction *BBI,
+                                                      BasicBlock *BB) {
+  // Figure out the range of the LHS.  If that fails, bail.
+  if (!hasBlockValue(BBI->getOperand(0), BB)) {
+    if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+      return false;
+    BBLV.markOverdefined();
+    return true;
+  }
+
+  LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+  mergeAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+  if (!LHSVal.isConstantRange()) {
+    BBLV.markOverdefined();
+    return true;
+  }
+
+  ConstantRange LHSRange = LHSVal.getConstantRange();
+  ConstantRange RHSRange(1);
+  IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+  if (isa<BinaryOperator>(BBI)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+      RHSRange = ConstantRange(RHS->getValue());
+    } else {
+      BBLV.markOverdefined();
+      return true;
+    }
+  }
+
+  // NOTE: We're currently limited by the set of operations that ConstantRange
+  // can evaluate symbolically.  Enhancing that set will allows us to analyze
+  // more definitions.
+  LVILatticeVal Result;
+  switch (BBI->getOpcode()) {
+  case Instruction::Add:
+    Result.markConstantRange(LHSRange.add(RHSRange));
+    break;
+  case Instruction::Sub:
+    Result.markConstantRange(LHSRange.sub(RHSRange));
+    break;
+  case Instruction::Mul:
+    Result.markConstantRange(LHSRange.multiply(RHSRange));
+    break;
+  case Instruction::UDiv:
+    Result.markConstantRange(LHSRange.udiv(RHSRange));
+    break;
+  case Instruction::Shl:
+    Result.markConstantRange(LHSRange.shl(RHSRange));
+    break;
+  case Instruction::LShr:
+    Result.markConstantRange(LHSRange.lshr(RHSRange));
+    break;
+  case Instruction::Trunc:
+    Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+    break;
+  case Instruction::SExt:
+    Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::ZExt:
+    Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::BitCast:
+    Result.markConstantRange(LHSRange);
+    break;
+  case Instruction::And:
+    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+    break;
+  case Instruction::Or:
+    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+    break;
+
+  // Unhandled instructions are overdefined.
+  default:
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    Result.markOverdefined();
+    break;
+  }
+
+  BBLV = Result;
+  return true;
+}
+
+bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+                               LVILatticeVal &Result, bool isTrueDest) {
+  if (ICI && isa<Constant>(ICI->getOperand(1))) {
+    if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+      // We know that V has the RHS constant if this is a true SETEQ or
+      // false SETNE. 
+      if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+        Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+      else
+        Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+      return true;
+    }
+
+    // Recognize the range checking idiom that InstCombine produces.
+    // (X-C1) u< C2 --> [C1, C1+C2)
+    ConstantInt *NegOffset = nullptr;
+    if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+      match(ICI->getOperand(0), m_Add(m_Specific(Val),
+                                      m_ConstantInt(NegOffset)));
+
+    ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+    if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
+      // Calculate the range of values that are allowed by the comparison
+      ConstantRange CmpRange(CI->getValue());
+      ConstantRange TrueValues =
+          ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
+
+      if (NegOffset) // Apply the offset from above.
+        TrueValues = TrueValues.subtract(NegOffset->getValue());
+
+      // If we're interested in the false dest, invert the condition.
+      if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+      Result = LVILatticeVal::getRange(TrueValues);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
+/// Val is not constrained on the edge.
+static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
+                              BasicBlock *BBTo, LVILatticeVal &Result) {
+  // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+  // know that v != 0.
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+    // If this is a conditional branch and only one successor goes to BBTo, then
+    // we may be able to infer something from the condition.
+    if (BI->isConditional() &&
+        BI->getSuccessor(0) != BI->getSuccessor(1)) {
+      bool isTrueDest = BI->getSuccessor(0) == BBTo;
+      assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+             "BBTo isn't a successor of BBFrom");
+
+      // If V is the condition of the branch itself, then we know exactly what
+      // it is.
+      if (BI->getCondition() == Val) {
+        Result = LVILatticeVal::get(ConstantInt::get(
+                              Type::getInt1Ty(Val->getContext()), isTrueDest));
+        return true;
+      }
+
+      // If the condition of the branch is an equality comparison, we may be
+      // able to infer the value.
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
+          return true;
+    }
+  }
+
+  // If the edge was formed by a switch on the value, then we may know exactly
+  // what it is.
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+    if (SI->getCondition() != Val)
+      return false;
+
+    bool DefaultCase = SI->getDefaultDest() == BBTo;
+    unsigned BitWidth = Val->getType()->getIntegerBitWidth();
+    ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
+
+    for (SwitchInst::CaseIt i : SI->cases()) {
+      ConstantRange EdgeVal(i.getCaseValue()->getValue());
+      if (DefaultCase) {
+        // It is possible that the default destination is the destination of
+        // some cases. There is no need to perform difference for those cases.
+        if (i.getCaseSuccessor() != BBTo)
+          EdgesVals = EdgesVals.difference(EdgeVal);
+      } else if (i.getCaseSuccessor() == BBTo)
+        EdgesVals = EdgesVals.unionWith(EdgeVal);
+    }
+    Result = LVILatticeVal::getRange(EdgesVals);
+    return true;
+  }
+  return false;
+}
+
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
+/// the basic block if the edge does not constrain Val.
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+                                      BasicBlock *BBTo, LVILatticeVal &Result,
+                                      Instruction *CxtI) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val)) {
+    Result = LVILatticeVal::get(VC);
+    return true;
+  }
+
+  if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) {
+    if (!Result.isConstantRange() ||
+        Result.getConstantRange().getSingleElement())
+      return true;
+
+    // FIXME: this check should be moved to the beginning of the function when
+    // LVI better supports recursive values. Even for the single value case, we
+    // can intersect to detect dead code (an empty range).
+    if (!hasBlockValue(Val, BBFrom)) {
+      if (pushBlockValue(std::make_pair(BBFrom, Val)))
+        return false;
+      Result.markOverdefined();
+      return true;
+    }
+
+    // Try to intersect ranges of the BB and the constraint on the edge.
+    LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+    mergeAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+    // See note on the use of the CxtI with mergeAssumeBlockValueConstantRange,
+    // and caching, below.
+    mergeAssumeBlockValueConstantRange(Val, InBlock, CxtI);
+    if (!InBlock.isConstantRange())
+      return true;
+
+    ConstantRange Range =
+      Result.getConstantRange().intersectWith(InBlock.getConstantRange());
+    Result = LVILatticeVal::getRange(Range);
+    return true;
+  }
+
+  if (!hasBlockValue(Val, BBFrom)) {
+    if (pushBlockValue(std::make_pair(BBFrom, Val)))
+      return false;
+    Result.markOverdefined();
+    return true;
+  }
+
+  // If we couldn't compute the value on the edge, use the value from the BB.
+  Result = getBlockValue(Val, BBFrom);
+  mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator());
+  // We can use the context instruction (generically the ultimate instruction
+  // the calling pass is trying to simplify) here, even though the result of
+  // this function is generally cached when called from the solve* functions
+  // (and that cached result might be used with queries using a different
+  // context instruction), because when this function is called from the solve*
+  // functions, the context instruction is not provided. When called from
+  // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+  // but then the result is not cached.
+  mergeAssumeBlockValueConstantRange(Val, Result, CxtI);
+  return true;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+                                                  Instruction *CxtI) {
+  DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+        << BB->getName() << "'\n");
+
+  assert(BlockValueStack.empty() && BlockValueSet.empty());
+  pushBlockValue(std::make_pair(BB, V));
+
+  solve();
+  LVILatticeVal Result = getBlockValue(V, BB);
+  mergeAssumeBlockValueConstantRange(V, Result, CxtI);
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+  DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
+        << CxtI->getName() << "'\n");
+
+  LVILatticeVal Result;
+  if (auto *I = dyn_cast<Instruction>(V))
+    Result = getFromRangeMetadata(I);
+  mergeAssumeBlockValueConstantRange(V, Result, CxtI);
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
+               Instruction *CxtI) {
+  DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+        << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
+  LVILatticeVal Result;
+  if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
+    solve();
+    bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI);
+    (void)WasFastQuery;
+    assert(WasFastQuery && "More work to do after problem solved?");
+  }
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                                    BasicBlock *NewSucc) {
+  // When an edge in the graph has been threaded, values that we could not
+  // determine a value for before (i.e. were marked overdefined) may be
+  // possible to solve now. We do NOT try to proactively update these values.
+  // Instead, we clear their entries from the cache, and allow lazy updating to
+  // recompute them when needed.
+
+  // The updating process is fairly simple: we need to drop cached info
+  // for all values that were marked overdefined in OldSucc, and for those same
+  // values in any successor of OldSucc (except NewSucc) in which they were
+  // also marked overdefined.
+  std::vector<BasicBlock*> worklist;
+  worklist.push_back(OldSucc);
+
+  auto I = OverDefinedCache.find(OldSucc);
+  if (I == OverDefinedCache.end())
+    return; // Nothing to process here.
+  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
+  // Use a worklist to perform a depth-first search of OldSucc's successors.
+  // NOTE: We do not need a visited list since any blocks we have already
+  // visited will have had their overdefined markers cleared already, and we
+  // thus won't loop to their successors.
+  while (!worklist.empty()) {
+    BasicBlock *ToUpdate = worklist.back();
+    worklist.pop_back();
+
+    // Skip blocks only accessible through NewSucc.
+    if (ToUpdate == NewSucc) continue;
+
+    bool changed = false;
+    for (Value *V : ValsToClear) {
+      // If a value was marked overdefined in OldSucc, and is here too...
+      auto OI = OverDefinedCache.find(ToUpdate);
+      if (OI == OverDefinedCache.end())
+        continue;
+      SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+      if (!ValueSet.count(V))
+        continue;
+
+      ValueSet.erase(V);
+      if (ValueSet.empty())
+        OverDefinedCache.erase(OI);
+
+      // If we removed anything, then we potentially need to update
+      // blocks successors too.
+      changed = true;
+    }
+
+    if (!changed) continue;
+
+    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                            LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
+                                    const DataLayout *DL,
+                                    DominatorTree *DT = nullptr) {
+  if (!PImpl) {
+    assert(DL && "getCache() called with a null DataLayout");
+    PImpl = new LazyValueInfoCache(AC, *DL, DT);
+  }
+  return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+  if (PImpl)
+    getCache(PImpl, AC, &DL, DT).clear();
+
+  // Fully lazy.
+  return false;
+}
+
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+void LazyValueInfo::releaseMemory() {
+  // If the cache was allocated, free it.
+  if (PImpl) {
+    delete &getCache(PImpl, AC, nullptr);
+    PImpl = nullptr;
+  }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
+                                     Instruction *CxtI) {
+  const DataLayout &DL = BB->getModule()->getDataLayout();
+  LVILatticeVal Result =
+      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
+
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return nullptr;
+}
+
+/// Determine whether the specified value is known to be a
+/// constant on the specified edge. Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+                                           BasicBlock *ToBB,
+                                           Instruction *CxtI) {
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
+  LVILatticeVal Result =
+      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return nullptr;
+}
+
+static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
+                                                  LVILatticeVal &Result,
+                                                  const DataLayout &DL,
+                                                  TargetLibraryInfo *TLI) {
+
+  // If we know the value is a constant, evaluate the conditional.
+  Constant *Res = nullptr;
+  if (Result.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
+                                          TLI);
+    if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+      return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
+    return LazyValueInfo::Unknown;
+  }
+
+  if (Result.isConstantRange()) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(C);
+    if (!CI) return LazyValueInfo::Unknown;
+
+    ConstantRange CR = Result.getConstantRange();
+    if (Pred == ICmpInst::ICMP_EQ) {
+      if (!CR.contains(CI->getValue()))
+        return LazyValueInfo::False;
+
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return LazyValueInfo::True;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      if (!CR.contains(CI->getValue()))
+        return LazyValueInfo::True;
+
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return LazyValueInfo::False;
+    }
+
+    // Handle more complex predicates.
+    ConstantRange TrueValues =
+        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    if (TrueValues.contains(CR))
+      return LazyValueInfo::True;
+    if (TrueValues.inverse().contains(CR))
+      return LazyValueInfo::False;
+    return LazyValueInfo::Unknown;
+  }
+
+  if (Result.isNotConstant()) {
+    // If this is an equality comparison, we can try to fold it knowing that
+    // "V != C1".
+    if (Pred == ICmpInst::ICMP_EQ) {
+      // !C1 == C -> false iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, DL,
+                                            TLI);
+      if (Res->isNullValue())
+        return LazyValueInfo::False;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      // !C1 != C -> true iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, DL,
+                                            TLI);
+      if (Res->isNullValue())
+        return LazyValueInfo::True;
+    }
+    return LazyValueInfo::Unknown;
+  }
+
+  return LazyValueInfo::Unknown;
+}
+
+/// Determine whether the specified value comparison with a constant is known to
+/// be true or false on the specified CFG edge. Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                                  BasicBlock *FromBB, BasicBlock *ToBB,
+                                  Instruction *CxtI) {
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
+  LVILatticeVal Result =
+      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+  return getPredicateResult(Pred, C, Result, DL, TLI);
+}
+
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
+                              Instruction *CxtI) {
+  const DataLayout &DL = CxtI->getModule()->getDataLayout();
+  LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
+  Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
+  if (Ret != Unknown)
+    return Ret;
+
+  // Note: The following bit of code is somewhat distinct from the rest of LVI;
+  // LVI as a whole tries to compute a lattice value which is conservatively
+  // correct at a given location.  In this case, we have a predicate which we
+  // weren't able to prove about the merged result, and we're pushing that
+  // predicate back along each incoming edge to see if we can prove it
+  // separately for each input.  As a motivating example, consider:
+  // bb1:
+  //   %v1 = ... ; constantrange<1, 5>
+  //   br label %merge
+  // bb2:
+  //   %v2 = ... ; constantrange<10, 20>
+  //   br label %merge
+  // merge:
+  //   %phi = phi [%v1, %v2] ; constantrange<1,20>
+  //   %pred = icmp eq i32 %phi, 8
+  // We can't tell from the lattice value for '%phi' that '%pred' is false
+  // along each path, but by checking the predicate over each input separately,
+  // we can.
+  // We limit the search to one step backwards from the current BB and value.
+  // We could consider extending this to search further backwards through the
+  // CFG and/or value graph, but there are non-obvious compile time vs quality
+  // tradeoffs.  
+  if (CxtI) {
+    BasicBlock *BB = CxtI->getParent();
+
+    // Function entry or an unreachable block.  Bail to avoid confusing
+    // analysis below.
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (PI == PE)
+      return Unknown;
+
+    // If V is a PHI node in the same block as the context, we need to ask
+    // questions about the predicate as applied to the incoming value along
+    // each edge. This is useful for eliminating cases where the predicate is
+    // known along all incoming edges.
+    if (auto *PHI = dyn_cast<PHINode>(V))
+      if (PHI->getParent() == BB) {
+        Tristate Baseline = Unknown;
+        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+          Value *Incoming = PHI->getIncomingValue(i);
+          BasicBlock *PredBB = PHI->getIncomingBlock(i);
+          // Note that PredBB may be BB itself.        
+          Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
+                                               CxtI);
+          
+          // Keep going as long as we've seen a consistent known result for
+          // all inputs.
+          Baseline = (i == 0) ? Result /* First iteration */
+            : (Baseline == Result ? Baseline : Unknown); /* All others */
+          if (Baseline == Unknown)
+            break;
+        }
+        if (Baseline != Unknown)
+          return Baseline;
+      }    
+
+    // For a comparison where the V is outside this block, it's possible
+    // that we've branched on it before. Look to see if the value is known
+    // on all incoming edges.
+    if (!isa<Instruction>(V) ||
+        cast<Instruction>(V)->getParent() != BB) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge. If they're all true or all false, we can conclude
+      // the value of the comparison in this block.
+      Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+      if (Baseline != Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+          if (Ret != Baseline) break;
+        }
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          return Baseline;
+        }
+      }
+    }
+  }
+  return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                               BasicBlock *NewSucc) {
+  if (PImpl) {
+    const DataLayout &DL = PredBB->getModule()->getDataLayout();
+    getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+  }
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+  if (PImpl) {
+    const DataLayout &DL = BB->getModule()->getDataLayout();
+    getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
new file mode 100644
index 0000000..2dfb09c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -0,0 +1,720 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+//
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+//
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Lint.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  namespace MemRef {
+    static const unsigned Read     = 1;
+    static const unsigned Write    = 2;
+    static const unsigned Callee   = 4;
+    static const unsigned Branchee = 8;
+  }
+
+  class Lint : public FunctionPass, public InstVisitor<Lint> {
+    friend class InstVisitor<Lint>;
+
+    void visitFunction(Function &F);
+
+    void visitCallSite(CallSite CS);
+    void visitMemoryReference(Instruction &I, Value *Ptr,
+                              uint64_t Size, unsigned Align,
+                              Type *Ty, unsigned Flags);
+    void visitEHBeginCatch(IntrinsicInst *II);
+    void visitEHEndCatch(IntrinsicInst *II);
+
+    void visitCallInst(CallInst &I);
+    void visitInvokeInst(InvokeInst &I);
+    void visitReturnInst(ReturnInst &I);
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst(StoreInst &I);
+    void visitXor(BinaryOperator &I);
+    void visitSub(BinaryOperator &I);
+    void visitLShr(BinaryOperator &I);
+    void visitAShr(BinaryOperator &I);
+    void visitShl(BinaryOperator &I);
+    void visitSDiv(BinaryOperator &I);
+    void visitUDiv(BinaryOperator &I);
+    void visitSRem(BinaryOperator &I);
+    void visitURem(BinaryOperator &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitVAArgInst(VAArgInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitUnreachableInst(UnreachableInst &I);
+
+    Value *findValue(Value *V, bool OffsetOk) const;
+    Value *findValueImpl(Value *V, bool OffsetOk,
+                         SmallPtrSetImpl<Value *> &Visited) const;
+
+  public:
+    Module *Mod;
+    const DataLayout *DL;
+    AliasAnalysis *AA;
+    AssumptionCache *AC;
+    DominatorTree *DT;
+    TargetLibraryInfo *TLI;
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    static char ID; // Pass identification, replacement for typeid
+    Lint() : FunctionPass(ID), MessagesStr(Messages) {
+      initializeLintPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<DominatorTreeWrapperPass>();
+    }
+    void print(raw_ostream &O, const Module *M) const override {}
+
+    void WriteValues(ArrayRef<const Value *> Vs) {
+      for (const Value *V : Vs) {
+        if (!V)
+          continue;
+        if (isa<Instruction>(V)) {
+          MessagesStr << *V << '\n';
+        } else {
+          V->printAsOperand(MessagesStr, true, Mod);
+          MessagesStr << '\n';
+        }
+      }
+    }
+
+    /// \brief A check failed, so printout out the condition and the message.
+    ///
+    /// This provides a nice place to put a breakpoint if you want to see why
+    /// something is not correct.
+    void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
+
+    /// \brief A check failed (with values to print).
+    ///
+    /// This calls the Message-only version so that the above is easier to set
+    /// a breakpoint on.
+    template <typename T1, typename... Ts>
+    void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
+      CheckFailed(Message);
+      WriteValues({V1, Vs...});
+    }
+  };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+                    false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, ...) \
+    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+  Mod = F.getParent();
+  DL = &F.getParent()->getDataLayout();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  visit(F);
+  dbgs() << MessagesStr.str();
+  Messages.clear();
+  return false;
+}
+
+void Lint::visitFunction(Function &F) {
+  // This isn't undefined behavior, it's just a little unusual, and it's a
+  // fairly common mistake to neglect to name a function.
+  Assert(F.hasName() || F.hasLocalLinkage(),
+         "Unusual: Unnamed function with non-local linkage", &F);
+
+  // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+  Instruction &I = *CS.getInstruction();
+  Value *Callee = CS.getCalledValue();
+
+  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
+                       MemRef::Callee);
+
+  if (Function *F = dyn_cast<Function>(findValue(Callee,
+                                                 /*OffsetOk=*/false))) {
+    Assert(CS.getCallingConv() == F->getCallingConv(),
+           "Undefined behavior: Caller and callee calling convention differ",
+           &I);
+
+    FunctionType *FT = F->getFunctionType();
+    unsigned NumActualArgs = CS.arg_size();
+
+    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+                          : FT->getNumParams() == NumActualArgs,
+           "Undefined behavior: Call argument count mismatches callee "
+           "argument count",
+           &I);
+
+    Assert(FT->getReturnType() == I.getType(),
+           "Undefined behavior: Call return type mismatches "
+           "callee return type",
+           &I);
+
+    // Check argument types (in case the callee was casted) and attributes.
+    // TODO: Verify that caller and callee attributes are compatible.
+    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+    for (; AI != AE; ++AI) {
+      Value *Actual = *AI;
+      if (PI != PE) {
+        Argument *Formal = &*PI++;
+        Assert(Formal->getType() == Actual->getType(),
+               "Undefined behavior: Call argument type mismatches "
+               "callee parameter type",
+               &I);
+
+        // Check that noalias arguments don't alias other arguments. This is
+        // not fully precise because we don't know the sizes of the dereferenced
+        // memory regions.
+        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+            if (AI != BI && (*BI)->getType()->isPointerTy()) {
+              AliasResult Result = AA->alias(*AI, *BI);
+              Assert(Result != MustAlias && Result != PartialAlias,
+                     "Unusual: noalias argument aliases another argument", &I);
+            }
+
+        // Check that an sret argument points to valid memory.
+        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+          Type *Ty =
+            cast<PointerType>(Formal->getType())->getElementType();
+          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
+                               DL->getABITypeAlignment(Ty), Ty,
+                               MemRef::Read | MemRef::Write);
+        }
+      }
+    }
+  }
+
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+         AI != AE; ++AI) {
+      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+      Assert(!isa<AllocaInst>(Obj),
+             "Undefined behavior: Call with \"tail\" keyword references "
+             "alloca",
+             &I);
+    }
+
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+    switch (II->getIntrinsicID()) {
+    default: break;
+
+    // TODO: Check more intrinsics
+
+    case Intrinsic::memcpy: {
+      MemCpyInst *MCI = cast<MemCpyInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
+                           MCI->getAlignment(), nullptr, MemRef::Write);
+      visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
+                           MCI->getAlignment(), nullptr, MemRef::Read);
+
+      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+      // isn't expressive enough for what we really want to do. Known partial
+      // overlap is not distinguished from the case where nothing is known.
+      uint64_t Size = 0;
+      if (const ConstantInt *Len =
+              dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+                                              /*OffsetOk=*/false)))
+        if (Len->getValue().isIntN(32))
+          Size = Len->getValue().getZExtValue();
+      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+                 MustAlias,
+             "Undefined behavior: memcpy source and destination overlap", &I);
+      break;
+    }
+    case Intrinsic::memmove: {
+      MemMoveInst *MMI = cast<MemMoveInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
+                           MMI->getAlignment(), nullptr, MemRef::Write);
+      visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
+                           MMI->getAlignment(), nullptr, MemRef::Read);
+      break;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MSI = cast<MemSetInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
+                           MSI->getAlignment(), nullptr, MemRef::Write);
+      break;
+    }
+
+    case Intrinsic::vastart:
+      Assert(I.getParent()->getParent()->isVarArg(),
+             "Undefined behavior: va_start called in a non-varargs function",
+             &I);
+
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
+      break;
+    case Intrinsic::vacopy:
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read);
+      break;
+    case Intrinsic::vaend:
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
+      break;
+
+    case Intrinsic::stackrestore:
+      // Stackrestore doesn't read or write memory, but it sets the
+      // stack pointer, which the compiler may read from or write to
+      // at any time, so check it for both readability and writeability.
+      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
+                           nullptr, MemRef::Read | MemRef::Write);
+      break;
+    }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+  Function *F = I.getParent()->getParent();
+  Assert(!F->doesNotReturn(),
+         "Unusual: Return statement in function with noreturn attribute", &I);
+
+  if (Value *V = I.getReturnValue()) {
+    Value *Obj = findValue(V, /*OffsetOk=*/true);
+    Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
+  }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+                                Value *Ptr, uint64_t Size, unsigned Align,
+                                Type *Ty, unsigned Flags) {
+  // If no memory is being referenced, it doesn't matter if the pointer
+  // is valid.
+  if (Size == 0)
+    return;
+
+  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
+         "Undefined behavior: Null pointer dereference", &I);
+  Assert(!isa<UndefValue>(UnderlyingObject),
+         "Undefined behavior: Undef pointer dereference", &I);
+  Assert(!isa<ConstantInt>(UnderlyingObject) ||
+             !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+         "Unusual: All-ones pointer dereference", &I);
+  Assert(!isa<ConstantInt>(UnderlyingObject) ||
+             !cast<ConstantInt>(UnderlyingObject)->isOne(),
+         "Unusual: Address one pointer dereference", &I);
+
+  if (Flags & MemRef::Write) {
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+      Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+             &I);
+    Assert(!isa<Function>(UnderlyingObject) &&
+               !isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Write to text section", &I);
+  }
+  if (Flags & MemRef::Read) {
+    Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+           &I);
+    Assert(!isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Load from block address", &I);
+  }
+  if (Flags & MemRef::Callee) {
+    Assert(!isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Call to block address", &I);
+  }
+  if (Flags & MemRef::Branchee) {
+    Assert(!isa<Constant>(UnderlyingObject) ||
+               isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Branch to non-blockaddress", &I);
+  }
+
+  // Check for buffer overflows and misalignment.
+  // Only handles memory references that read/write something simple like an
+  // alloca instruction or a global variable.
+  int64_t Offset = 0;
+  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
+    // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
+    // something we can handle and if so extract the size of this base object
+    // along with its alignment.
+    uint64_t BaseSize = MemoryLocation::UnknownSize;
+    unsigned BaseAlign = 0;
+
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+      Type *ATy = AI->getAllocatedType();
+      if (!AI->isArrayAllocation() && ATy->isSized())
+        BaseSize = DL->getTypeAllocSize(ATy);
+      BaseAlign = AI->getAlignment();
+      if (BaseAlign == 0 && ATy->isSized())
+        BaseAlign = DL->getABITypeAlignment(ATy);
+    } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+      // If the global may be defined differently in another compilation unit
+      // then don't warn about funky memory accesses.
+      if (GV->hasDefinitiveInitializer()) {
+        Type *GTy = GV->getType()->getElementType();
+        if (GTy->isSized())
+          BaseSize = DL->getTypeAllocSize(GTy);
+        BaseAlign = GV->getAlignment();
+        if (BaseAlign == 0 && GTy->isSized())
+          BaseAlign = DL->getABITypeAlignment(GTy);
+      }
+    }
+
+    // Accesses from before the start or after the end of the object are not
+    // defined.
+    Assert(Size == MemoryLocation::UnknownSize ||
+               BaseSize == MemoryLocation::UnknownSize ||
+               (Offset >= 0 && Offset + Size <= BaseSize),
+           "Undefined behavior: Buffer overflow", &I);
+
+    // Accesses that say that the memory is more aligned than it is are not
+    // defined.
+    if (Align == 0 && Ty && Ty->isSized())
+      Align = DL->getABITypeAlignment(Ty);
+    Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+           "Undefined behavior: Memory reference address is misaligned", &I);
+  }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       DL->getTypeStoreSize(I.getType()), I.getAlignment(),
+                       I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       DL->getTypeStoreSize(I.getOperand(0)->getType()),
+                       I.getAlignment(),
+                       I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+         "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+         "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
+                                                        /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+  if (ConstantInt *CI =
+          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
+                   AssumptionCache *AC) {
+  // Assume undef could be zero.
+  if (isa<UndefValue>(V))
+    return true;
+
+  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
+  if (!VecTy) {
+    unsigned BitWidth = V->getType()->getIntegerBitWidth();
+    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+    computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
+                     dyn_cast<Instruction>(V), DT);
+    return KnownZero.isAllOnesValue();
+  }
+
+  // Per-component check doesn't work with zeroinitializer
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C)
+    return false;
+
+  if (C->isZeroValue())
+    return true;
+
+  // For a vector, KnownZero will only be true if all values are zero, so check
+  // this per component
+  unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
+  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+    Constant *Elem = C->getAggregateElement(I);
+    if (isa<UndefValue>(Elem))
+      return true;
+
+    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+    computeKnownBits(Elem, KnownZero, KnownOne, DL);
+    if (KnownZero.isAllOnesValue())
+      return true;
+  }
+
+  return false;
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+  if (isa<ConstantInt>(I.getArraySize()))
+    // This isn't undefined behavior, it's just an obvious pessimization.
+    Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+           "Pessimization: Static alloca outside of entry block", &I);
+
+  // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+  visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, 0,
+                       nullptr, MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+  visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, 0,
+                       nullptr, MemRef::Branchee);
+
+  Assert(I.getNumDestinations() != 0,
+         "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+                                                        /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+           "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+                                                        /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(I.getType()->getNumElements()),
+           "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+  // This isn't undefined behavior, it's merely suspicious.
+  Assert(&I == &I.getParent()->front() ||
+             std::prev(I.getIterator())->mayHaveSideEffects(),
+         "Unusual: unreachable immediately preceded by instruction without "
+         "side effects",
+         &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value.  If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+  SmallPtrSet<Value *, 4> Visited;
+  return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+                           SmallPtrSetImpl<Value *> &Visited) const {
+  // Detect self-referential values.
+  if (!Visited.insert(V).second)
+    return UndefValue::get(V->getType());
+
+  // TODO: Look through sext or zext cast, when the result is known to
+  // be interpreted as signed or unsigned, respectively.
+  // TODO: Look through eliminable cast pairs.
+  // TODO: Look through calls with unique return values.
+  // TODO: Look through vector insert/extract/shuffle.
+  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
+  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+    BasicBlock::iterator BBI = L->getIterator();
+    BasicBlock *BB = L->getParent();
+    SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+    for (;;) {
+      if (!VisitedBlocks.insert(BB).second)
+        break;
+      if (Value *U =
+          FindAvailableLoadedValue(L->getPointerOperand(),
+                                   BB, BBI, DefMaxInstsToScan, AA))
+        return findValueImpl(U, OffsetOk, Visited);
+      if (BBI != BB->begin()) break;
+      BB = BB->getUniquePredecessor();
+      if (!BB) break;
+      BBI = BB->end();
+    }
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (Value *W = PN->hasConstantValue())
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+    if (CI->isNoopCast(*DL))
+      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+    if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+                                     Ex->getIndices()))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // Same as above, but for ConstantExpr instead of Instruction.
+    if (Instruction::isCast(CE->getOpcode())) {
+      if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+                               CE->getOperand(0)->getType(), CE->getType(),
+                               DL->getIntPtrType(V->getType())))
+        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+    } else if (CE->getOpcode() == Instruction::ExtractValue) {
+      ArrayRef<unsigned> Indices = CE->getIndices();
+      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
+        if (W != V)
+          return findValueImpl(W, OffsetOk, Visited);
+    }
+  }
+
+  // As a last resort, try SimplifyInstruction or constant folding.
+  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+      return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  }
+
+  return V;
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+  return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot lint external functions");
+
+  legacy::FunctionPassManager FPM(F.getParent());
+  Lint *V = new Lint();
+  FPM.add(V);
+  FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+  legacy::PassManager PM;
+  Lint *V = new Lint();
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+}
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
new file mode 100644
index 0000000..4b2fa3c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -0,0 +1,287 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+/// \brief Test if A and B will obviously have the same value.
+///
+/// This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// \code
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+/// \endcode
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B)
+    return true;
+
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
+  if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) ||
+      isa<GetElementPtrInst>(A))
+    if (const Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+/// \brief Check if executing a load of this pointer value cannot trap.
+///
+/// If it is not obviously safe to load from the specified pointer, we do
+/// a quick local scan of the basic block containing \c ScanFrom, to determine
+/// if the address is already accessed.
+///
+/// This uses the pointee type to determine how many bytes need to be safe to
+/// load from the pointer.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+                                       unsigned Align) {
+  const DataLayout &DL = ScanFrom->getModule()->getDataLayout();
+
+  // Zero alignment means that the load has the ABI alignment for the target
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
+  assert(isPowerOf2_32(Align));
+
+  int64_t ByteOffset = 0;
+  Value *Base = V;
+  Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
+
+  if (ByteOffset < 0) // out of bounds
+    return false;
+
+  Type *BaseType = nullptr;
+  unsigned BaseAlign = 0;
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+    // An alloca is safe to load from as load as it is suitably aligned.
+    BaseType = AI->getAllocatedType();
+    BaseAlign = AI->getAlignment();
+  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+    // Global variables are not necessarily safe to load from if they are
+    // overridden. Their size may change or they may be weak and require a test
+    // to determine if they were in fact provided.
+    if (!GV->mayBeOverridden()) {
+      BaseType = GV->getType()->getElementType();
+      BaseAlign = GV->getAlignment();
+    }
+  }
+
+  PointerType *AddrTy = cast<PointerType>(V->getType());
+  uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
+
+  // If we found a base allocated type from either an alloca or global variable,
+  // try to see if we are definitively within the allocated region. We need to
+  // know the size of the base type and the loaded type to do anything in this
+  // case.
+  if (BaseType && BaseType->isSized()) {
+    if (BaseAlign == 0)
+      BaseAlign = DL.getPrefTypeAlignment(BaseType);
+
+    if (Align <= BaseAlign) {
+      // Check if the load is within the bounds of the underlying object.
+      if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
+          ((ByteOffset % Align) == 0))
+        return true;
+    }
+  }
+
+  // Otherwise, be a little bit aggressive by scanning the local block where we
+  // want to check to see if the pointer is already being loaded or stored
+  // from/to.  If so, the previous load or store would have already trapped,
+  // so there is no harm doing an extra load (also, CSE will later eliminate
+  // the load entirely).
+  BasicBlock::iterator BBI = ScanFrom->getIterator(),
+                       E = ScanFrom->getParent()->begin();
+
+  // We can at least always strip pointer casts even though we can't use the
+  // base here.
+  V = V->stripPointerCasts();
+
+  while (BBI != E) {
+    --BBI;
+
+    // If we see a free or a call which may write to memory (i.e. which might do
+    // a free) the pointer could be marked invalid.
+    if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+        !isa<DbgInfoIntrinsic>(BBI))
+      return false;
+
+    Value *AccessedPtr;
+    unsigned AccessedAlign;
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      AccessedPtr = LI->getPointerOperand();
+      AccessedAlign = LI->getAlignment();
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      AccessedPtr = SI->getPointerOperand();
+      AccessedAlign = SI->getAlignment();
+    } else
+      continue;
+
+    Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
+    if (AccessedAlign == 0)
+      AccessedAlign = DL.getABITypeAlignment(AccessedTy);
+    if (AccessedAlign < Align)
+      continue;
+
+    // Handle trivial cases.
+    if (AccessedPtr == V)
+      return true;
+
+    if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
+        LoadSize <= DL.getTypeStoreSize(AccessedTy))
+      return true;
+  }
+  return false;
+}
+
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
+/// threading in part by eliminating partially redundant loads.
+/// At that point, the value of MaxInstsToScan was already set to '6'
+/// without documented explanation.
+cl::opt<unsigned>
+llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
+  cl::desc("Use this to specify the default maximum number of instructions "
+           "to scan backward from a given instruction, when searching for "
+           "available loaded value"));
+
+/// \brief Scan the ScanBB block backwards to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+///
+/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum
+/// instructions to scan in the block. If it is set to \c 0, it will scan the whole
+/// block.
+///
+/// If the value is available, this function returns it. If not, it returns the
+/// iterator for the last validated instruction that the value would be live
+/// through. If we scanned the entire block and didn't find something that
+/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last
+/// instruction processed and this returns null.
+///
+/// You can also optionally specify an alias analysis implementation, which
+/// makes this more precise.
+///
+/// If \c AATags is non-null and a load or store is found, the AA tags from the
+/// load or store are recorded there. If there are no AA tags or if no access is
+/// found, it is left unmodified.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+                                      BasicBlock::iterator &ScanFrom,
+                                      unsigned MaxInstsToScan,
+                                      AliasAnalysis *AA, AAMDNodes *AATags) {
+  if (MaxInstsToScan == 0)
+    MaxInstsToScan = ~0U;
+
+  Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+  const DataLayout &DL = ScanBB->getModule()->getDataLayout();
+
+  // Try to get the store size for the type.
+  uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
+
+  Value *StrippedPtr = Ptr->stripPointerCasts();
+
+  while (ScanFrom != ScanBB->begin()) {
+    // We must ignore debug info directives when counting (otherwise they
+    // would affect codegen).
+    Instruction *Inst = &*--ScanFrom;
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    // Restore ScanFrom to expected value in case next test succeeds
+    ScanFrom++;
+
+    // Don't scan huge blocks.
+    if (MaxInstsToScan-- == 0)
+      return nullptr;
+
+    --ScanFrom;
+    // If this is a load of Ptr, the loaded value is available.
+    // (This is true even if the load is volatile or atomic, although
+    // those cases are unlikely.)
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+      if (AreEquivalentAddressValues(
+              LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
+          CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {
+        if (AATags)
+          LI->getAAMetadata(*AATags);
+        return LI;
+      }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
+      // If this is a store through Ptr, the value is available!
+      // (This is true even if the store is volatile or atomic, although
+      // those cases are unlikely.)
+      if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
+          CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(),
+                                               AccessTy, DL)) {
+        if (AATags)
+          SI->getAAMetadata(*AATags);
+        return SI->getOperand(0);
+      }
+
+      // If both StrippedPtr and StorePtr reach all the way to an alloca or
+      // global and they are different, ignore the store. This is a trivial form
+      // of alias analysis that is important for reg2mem'd code.
+      if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
+          (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) &&
+          StrippedPtr != StorePtr)
+        continue;
+
+      // If we have alias analysis and it says the store won't modify the loaded
+      // value, ignore the store.
+      if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)
+        continue;
+
+      // Otherwise the store that may or may not alias the pointer, bail out.
+      ++ScanFrom;
+      return nullptr;
+    }
+
+    // If this is some other instruction that may clobber Ptr, bail out.
+    if (Inst->mayWriteToMemory()) {
+      // If alias analysis claims that it really won't modify the load,
+      // ignore it.
+      if (AA &&
+          (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)
+        continue;
+
+      // May modify the pointer, bail out.
+      ++ScanFrom;
+      return nullptr;
+    }
+  }
+
+  // Got to the start of the block, we didn't find it, but are done for this
+  // block.
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
new file mode 100644
index 0000000..d7896ad
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -0,0 +1,1861 @@
+//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the loop memory dependence that was originally
+// developed for the loop vectorizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/VectorUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-accesses"
+
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+                    cl::desc("Sets the SIMD width. Zero is autoselect."),
+                    cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+                        cl::desc("Sets the vectorization interleave count. "
+                                 "Zero is autoselect."),
+                        cl::location(
+                            VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave;
+
+static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
+    "runtime-memory-check-threshold", cl::Hidden,
+    cl::desc("When performing memory disambiguation checks at runtime do not "
+             "generate more than this number of comparisons (default = 8)."),
+    cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
+unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+    "memory-check-merge-threshold", cl::Hidden,
+    cl::desc("Maximum number of comparisons done when trying to merge "
+             "runtime memory checks. (default = 100)"),
+    cl::init(100));
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+/// \brief We collect dependences up to this threshold.
+static cl::opt<unsigned>
+    MaxDependences("max-dependences", cl::Hidden,
+                   cl::desc("Maximum number of dependences collected by "
+                            "loop-access analysis (default = 100)"),
+                   cl::init(100));
+
+bool VectorizerParams::isInterleaveForced() {
+  return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
+void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
+                                    const Function *TheFunction,
+                                    const Loop *TheLoop,
+                                    const char *PassName) {
+  DebugLoc DL = TheLoop->getStartLoc();
+  if (const Instruction *I = Message.getInstr())
+    DL = I->getDebugLoc();
+  emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
+                                 *TheFunction, DL, Message.str());
+}
+
+Value *llvm::stripIntegerCast(Value *V) {
+  if (CastInst *CI = dyn_cast<CastInst>(V))
+    if (CI->getOperand(0)->getType()->isIntegerTy())
+      return CI->getOperand(0);
+  return V;
+}
+
+const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
+                                            const ValueToValueMap &PtrToStride,
+                                            Value *Ptr, Value *OrigPtr) {
+  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
+
+  // If there is an entry in the map return the SCEV of the pointer with the
+  // symbolic stride replaced by one.
+  ValueToValueMap::const_iterator SI =
+      PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+  if (SI != PtrToStride.end()) {
+    Value *StrideVal = SI->second;
+
+    // Strip casts.
+    StrideVal = stripIntegerCast(StrideVal);
+
+    // Replace symbolic stride by one.
+    Value *One = ConstantInt::get(StrideVal->getType(), 1);
+    ValueToValueMap RewriteMap;
+    RewriteMap[StrideVal] = One;
+
+    ScalarEvolution *SE = PSE.getSE();
+    const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+    const auto *CT =
+        static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
+
+    PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+    auto *Expr = PSE.getSCEV(Ptr);
+
+    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
+                 << "\n");
+    return Expr;
+  }
+
+  // Otherwise, just return the SCEV of the original pointer.
+  return OrigSCEV;
+}
+
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+                                    unsigned DepSetId, unsigned ASId,
+                                    const ValueToValueMap &Strides,
+                                    PredicatedScalarEvolution &PSE) {
+  // Get the stride replaced scev.
+  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+  assert(AR && "Invalid addrec expression");
+  ScalarEvolution *SE = PSE.getSE();
+  const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+
+  const SCEV *ScStart = AR->getStart();
+  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // For expressions with negative step, the upper bound is ScStart and the
+  // lower bound is ScEnd.
+  if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
+    if (CStep->getValue()->isNegative())
+      std::swap(ScStart, ScEnd);
+  } else {
+    // Fallback case: the step is not constant, but the we can still
+    // get the upper and lower bounds of the interval by using min/max
+    // expressions.
+    ScStart = SE->getUMinExpr(ScStart, ScEnd);
+    ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
+  }
+
+  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+}
+
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+  SmallVector<PointerCheck, 4> Checks;
+
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+      const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+      const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+      if (needsChecking(CGI, CGJ))
+        Checks.push_back(std::make_pair(&CGI, &CGJ));
+    }
+  }
+  return Checks;
+}
+
+void RuntimePointerChecking::generateChecks(
+    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+  assert(Checks.empty() && "Checks is not empty");
+  groupChecks(DepCands, UseDependencies);
+  Checks = generateChecks();
+}
+
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+                                           const CheckingPtrGroup &N) const {
+  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+    for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+      if (needsChecking(M.Members[I], N.Members[J]))
+        return true;
+  return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+                                   ScalarEvolution *SE) {
+  const SCEV *Diff = SE->getMinusSCEV(J, I);
+  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+  if (!C)
+    return nullptr;
+  if (C->getValue()->isNegative())
+    return J;
+  return I;
+}
+
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+  const SCEV *Start = RtCheck.Pointers[Index].Start;
+  const SCEV *End = RtCheck.Pointers[Index].End;
+
+  // Compare the starts and ends with the known minimum and maximum
+  // of this set. We need to know how we compare against the min/max
+  // of the set in order to be able to emit memchecks.
+  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
+  if (!Min0)
+    return false;
+
+  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
+  if (!Min1)
+    return false;
+
+  // Update the low bound  expression if we've found a new min value.
+  if (Min0 == Start)
+    Low = Start;
+
+  // Update the high bound expression if we've found a new max value.
+  if (Min1 != End)
+    High = End;
+
+  Members.push_back(Index);
+  return true;
+}
+
+void RuntimePointerChecking::groupChecks(
+    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+  // We build the groups from dependency candidates equivalence classes
+  // because:
+  //    - We know that pointers in the same equivalence class share
+  //      the same underlying object and therefore there is a chance
+  //      that we can compare pointers
+  //    - We wouldn't be able to merge two pointers for which we need
+  //      to emit a memcheck. The classes in DepCands are already
+  //      conveniently built such that no two pointers in the same
+  //      class need checking against each other.
+
+  // We use the following (greedy) algorithm to construct the groups
+  // For every pointer in the equivalence class:
+  //   For each existing group:
+  //   - if the difference between this pointer and the min/max bounds
+  //     of the group is a constant, then make the pointer part of the
+  //     group and update the min/max bounds of that group as required.
+
+  CheckingGroups.clear();
+
+  // If we need to check two pointers to the same underlying object
+  // with a non-constant difference, we shouldn't perform any pointer
+  // grouping with those pointers. This is because we can easily get
+  // into cases where the resulting check would return false, even when
+  // the accesses are safe.
+  //
+  // The following example shows this:
+  // for (i = 0; i < 1000; ++i)
+  //   a[5000 + i * m] = a[i] + a[i + 9000]
+  //
+  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+  // (0, 10000) which is always false. However, if m is 1, there is no
+  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+  // us to perform an accurate check in this case.
+  //
+  // The above case requires that we have an UnknownDependence between
+  // accesses to the same underlying object. This cannot happen unless
+  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+  // is also false. In this case we will use the fallback path and create
+  // separate checking groups for all pointers.
+
+  // If we don't have the dependency partitions, construct a new
+  // checking pointer group for each pointer. This is also required
+  // for correctness, because in this case we can have checking between
+  // pointers to the same underlying object.
+  if (!UseDependencies) {
+    for (unsigned I = 0; I < Pointers.size(); ++I)
+      CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+    return;
+  }
+
+  unsigned TotalComparisons = 0;
+
+  DenseMap<Value *, unsigned> PositionMap;
+  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+    PositionMap[Pointers[Index].PointerValue] = Index;
+
+  // We need to keep track of what pointers we've already seen so we
+  // don't process them twice.
+  SmallSet<unsigned, 2> Seen;
+
+  // Go through all equivalence classes, get the "pointer check groups"
+  // and add them to the overall solution. We use the order in which accesses
+  // appear in 'Pointers' to enforce determinism.
+  for (unsigned I = 0; I < Pointers.size(); ++I) {
+    // We've seen this pointer before, and therefore already processed
+    // its equivalence class.
+    if (Seen.count(I))
+      continue;
+
+    MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+                                           Pointers[I].IsWritePtr);
+
+    SmallVector<CheckingPtrGroup, 2> Groups;
+    auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+    // Because DepCands is constructed by visiting accesses in the order in
+    // which they appear in alias sets (which is deterministic) and the
+    // iteration order within an equivalence class member is only dependent on
+    // the order in which unions and insertions are performed on the
+    // equivalence class, the iteration order is deterministic.
+    for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+         MI != ME; ++MI) {
+      unsigned Pointer = PositionMap[MI->getPointer()];
+      bool Merged = false;
+      // Mark this pointer as seen.
+      Seen.insert(Pointer);
+
+      // Go through all the existing sets and see if we can find one
+      // which can include this pointer.
+      for (CheckingPtrGroup &Group : Groups) {
+        // Don't perform more than a certain amount of comparisons.
+        // This should limit the cost of grouping the pointers to something
+        // reasonable.  If we do end up hitting this threshold, the algorithm
+        // will create separate groups for all remaining pointers.
+        if (TotalComparisons > MemoryCheckMergeThreshold)
+          break;
+
+        TotalComparisons++;
+
+        if (Group.addPointer(Pointer)) {
+          Merged = true;
+          break;
+        }
+      }
+
+      if (!Merged)
+        // We couldn't add this pointer to any existing set or the threshold
+        // for the number of comparisons has been reached. Create a new group
+        // to hold the current pointer.
+        Groups.push_back(CheckingPtrGroup(Pointer, *this));
+    }
+
+    // We've computed the grouped checks for this partition.
+    // Save the results and continue with the next one.
+    std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+  }
+}
+
+bool RuntimePointerChecking::arePointersInSamePartition(
+    const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
+    unsigned PtrIdx2) {
+  return (PtrToPartition[PtrIdx1] != -1 &&
+          PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
+}
+
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
+  const PointerInfo &PointerI = Pointers[I];
+  const PointerInfo &PointerJ = Pointers[J];
+
+  // No need to check if two readonly pointers intersect.
+  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
+    return false;
+
+  // Only need to check pointers between two different dependency sets.
+  if (PointerI.DependencySetId == PointerJ.DependencySetId)
+    return false;
+
+  // Only need to check pointers in the same alias set.
+  if (PointerI.AliasSetId != PointerJ.AliasSetId)
+    return false;
+
+  return true;
+}
+
+void RuntimePointerChecking::printChecks(
+    raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+    unsigned Depth) const {
+  unsigned N = 0;
+  for (const auto &Check : Checks) {
+    const auto &First = Check.first->Members, &Second = Check.second->Members;
+
+    OS.indent(Depth) << "Check " << N++ << ":\n";
+
+    OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+    for (unsigned K = 0; K < First.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
+
+    OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+    for (unsigned K = 0; K < Second.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
+  }
+}
+
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
+
+  OS.indent(Depth) << "Run-time memory checks:\n";
+  printChecks(OS, Checks, Depth);
+
+  OS.indent(Depth) << "Grouped accesses:\n";
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    const auto &CG = CheckingGroups[I];
+
+    OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+    OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+                         << ")\n";
+    for (unsigned J = 0; J < CG.Members.size(); ++J) {
+      OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+                           << "\n";
+    }
+  }
+}
+
+namespace {
+/// \brief Analyses memory accesses in a loop.
+///
+/// Checks whether run time pointer checks are needed and builds sets for data
+/// dependence checking.
+class AccessAnalysis {
+public:
+  /// \brief Read or write access location.
+  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
+                 MemoryDepChecker::DepCandidates &DA,
+                 PredicatedScalarEvolution &PSE)
+      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
+        PSE(PSE) {}
+
+  /// \brief Register a load  and whether it is only read from.
+  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
+    Value *Ptr = const_cast<Value*>(Loc.Ptr);
+    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+    Accesses.insert(MemAccessInfo(Ptr, false));
+    if (IsReadOnly)
+      ReadOnlyPtr.insert(Ptr);
+  }
+
+  /// \brief Register a store.
+  void addStore(MemoryLocation &Loc) {
+    Value *Ptr = const_cast<Value*>(Loc.Ptr);
+    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+    Accesses.insert(MemAccessInfo(Ptr, true));
+  }
+
+  /// \brief Check whether we can check the pointers at runtime for
+  /// non-intersection.
+  ///
+  /// Returns true if we need no check or if we do and we can generate them
+  /// (i.e. the pointers have computable bounds).
+  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+                       Loop *TheLoop, const ValueToValueMap &Strides,
+                       bool ShouldCheckStride = false);
+
+  /// \brief Goes over all memory accesses, checks whether a RT check is needed
+  /// and builds sets of dependent accesses.
+  void buildDependenceSets() {
+    processMemAccesses();
+  }
+
+  /// \brief Initial processing of memory accesses determined that we need to
+  /// perform dependency checking.
+  ///
+  /// Note that this can later be cleared if we retry memcheck analysis without
+  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
+  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+
+  /// We decided that no dependence analysis would be used.  Reset the state.
+  void resetDepChecks(MemoryDepChecker &DepChecker) {
+    CheckDeps.clear();
+    DepChecker.clearDependences();
+  }
+
+  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+
+private:
+  typedef SetVector<MemAccessInfo> PtrAccessSet;
+
+  /// \brief Go over all memory access and check whether runtime pointer checks
+  /// are needed and build sets of dependency check candidates.
+  void processMemAccesses();
+
+  /// Set of all accesses.
+  PtrAccessSet Accesses;
+
+  const DataLayout &DL;
+
+  /// Set of accesses that need a further dependence check.
+  MemAccessInfoSet CheckDeps;
+
+  /// Set of pointers that are read only.
+  SmallPtrSet<Value*, 16> ReadOnlyPtr;
+
+  /// An alias set tracker to partition the access set by underlying object and
+  //intrinsic property (such as TBAA metadata).
+  AliasSetTracker AST;
+
+  LoopInfo *LI;
+
+  /// Sets of potentially dependent accesses - members of one set share an
+  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
+  /// dependence check.
+  MemoryDepChecker::DepCandidates &DepCands;
+
+  /// \brief Initial processing of memory accesses determined that we may need
+  /// to add memchecks.  Perform the analysis to determine the necessary checks.
+  ///
+  /// Note that, this is different from isDependencyCheckNeeded.  When we retry
+  /// memcheck analysis without dependency checking
+  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+  /// while this remains set if we have potentially dependent accesses.
+  bool IsRTCheckAnalysisNeeded;
+
+  /// The SCEV predicate containing all the SCEV-related assumptions.
+  PredicatedScalarEvolution &PSE;
+};
+
+} // end anonymous namespace
+
+/// \brief Check whether a pointer can participate in a runtime bounds check.
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
+                                const ValueToValueMap &Strides, Value *Ptr,
+                                Loop *L) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+  if (!AR)
+    return false;
+
+  return AR->isAffine();
+}
+
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+                                     ScalarEvolution *SE, Loop *TheLoop,
+                                     const ValueToValueMap &StridesMap,
+                                     bool ShouldCheckStride) {
+  // Find pointers with computable bounds. We are going to use this information
+  // to place a runtime bound check.
+  bool CanDoRT = true;
+
+  bool NeedRTCheck = false;
+  if (!IsRTCheckAnalysisNeeded) return true;
+
+  bool IsDepCheckNeeded = isDependencyCheckNeeded();
+
+  // We assign a consecutive id to access from different alias sets.
+  // Accesses between different groups doesn't need to be checked.
+  unsigned ASId = 1;
+  for (auto &AS : AST) {
+    int NumReadPtrChecks = 0;
+    int NumWritePtrChecks = 0;
+
+    // We assign consecutive id to access from different dependence sets.
+    // Accesses within the same set don't need a runtime check.
+    unsigned RunningDepId = 1;
+    DenseMap<Value *, unsigned> DepSetId;
+
+    for (auto A : AS) {
+      Value *Ptr = A.getValue();
+      bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+      MemAccessInfo Access(Ptr, IsWrite);
+
+      if (IsWrite)
+        ++NumWritePtrChecks;
+      else
+        ++NumReadPtrChecks;
+
+      if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
+          // When we run after a failing dependency check we have to make sure
+          // we don't have wrapping pointers.
+          (!ShouldCheckStride ||
+           isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {
+        // The id of the dependence set.
+        unsigned DepId;
+
+        if (IsDepCheckNeeded) {
+          Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+          unsigned &LeaderId = DepSetId[Leader];
+          if (!LeaderId)
+            LeaderId = RunningDepId++;
+          DepId = LeaderId;
+        } else
+          // Each access has its own dependence set.
+          DepId = RunningDepId++;
+
+        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
+
+        DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+      } else {
+        DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+        CanDoRT = false;
+      }
+    }
+
+    // If we have at least two writes or one write and a read then we need to
+    // check them.  But there is no need to checks if there is only one
+    // dependence set for this alias set.
+    //
+    // Note that this function computes CanDoRT and NeedRTCheck independently.
+    // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
+    // for which we couldn't find the bounds but we don't actually need to emit
+    // any checks so it does not matter.
+    if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
+      NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
+                                                 NumWritePtrChecks >= 1));
+
+    ++ASId;
+  }
+
+  // If the pointers that we would use for the bounds comparison have different
+  // address spaces, assume the values aren't directly comparable, so we can't
+  // use them for the runtime check. We also have to assume they could
+  // overlap. In the future there should be metadata for whether address spaces
+  // are disjoint.
+  unsigned NumPointers = RtCheck.Pointers.size();
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    for (unsigned j = i + 1; j < NumPointers; ++j) {
+      // Only need to check pointers between two different dependency sets.
+      if (RtCheck.Pointers[i].DependencySetId ==
+          RtCheck.Pointers[j].DependencySetId)
+       continue;
+      // Only need to check pointers in the same alias set.
+      if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
+        continue;
+
+      Value *PtrI = RtCheck.Pointers[i].PointerValue;
+      Value *PtrJ = RtCheck.Pointers[j].PointerValue;
+
+      unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+      unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+      if (ASi != ASj) {
+        DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
+                       " different address spaces\n");
+        return false;
+      }
+    }
+  }
+
+  if (NeedRTCheck && CanDoRT)
+    RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
+
+  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
+               << " pointer comparisons.\n");
+
+  RtCheck.Need = NeedRTCheck;
+
+  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+  if (!CanDoRTIfNeeded)
+    RtCheck.reset();
+  return CanDoRTIfNeeded;
+}
+
+void AccessAnalysis::processMemAccesses() {
+  // We process the set twice: first we process read-write pointers, last we
+  // process read-only pointers. This allows us to skip dependence tests for
+  // read-only pointers.
+
+  DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+  DEBUG(dbgs() << "  AST: "; AST.dump());
+  DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
+  DEBUG({
+    for (auto A : Accesses)
+      dbgs() << "\t" << *A.getPointer() << " (" <<
+                (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+                                         "read-only" : "read")) << ")\n";
+  });
+
+  // The AliasSetTracker has nicely partitioned our pointers by metadata
+  // compatibility and potential for underlying-object overlap. As a result, we
+  // only need to check for potential pointer dependencies within each alias
+  // set.
+  for (auto &AS : AST) {
+    // Note that both the alias-set tracker and the alias sets themselves used
+    // linked lists internally and so the iteration order here is deterministic
+    // (matching the original instruction order within each set).
+
+    bool SetHasWrite = false;
+
+    // Map of pointers to last access encountered.
+    typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+    UnderlyingObjToAccessMap ObjToLastAccess;
+
+    // Set of access to check after all writes have been processed.
+    PtrAccessSet DeferredAccesses;
+
+    // Iterate over each alias set twice, once to process read/write pointers,
+    // and then to process read-only pointers.
+    for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+      bool UseDeferred = SetIteration > 0;
+      PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+      for (auto AV : AS) {
+        Value *Ptr = AV.getValue();
+
+        // For a single memory access in AliasSetTracker, Accesses may contain
+        // both read and write, and they both need to be handled for CheckDeps.
+        for (auto AC : S) {
+          if (AC.getPointer() != Ptr)
+            continue;
+
+          bool IsWrite = AC.getInt();
+
+          // If we're using the deferred access set, then it contains only
+          // reads.
+          bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+          if (UseDeferred && !IsReadOnlyPtr)
+            continue;
+          // Otherwise, the pointer must be in the PtrAccessSet, either as a
+          // read or a write.
+          assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+                  S.count(MemAccessInfo(Ptr, false))) &&
+                 "Alias-set pointer not in the access set?");
+
+          MemAccessInfo Access(Ptr, IsWrite);
+          DepCands.insert(Access);
+
+          // Memorize read-only pointers for later processing and skip them in
+          // the first round (they need to be checked after we have seen all
+          // write pointers). Note: we also mark pointer that are not
+          // consecutive as "read-only" pointers (so that we check
+          // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
+          if (!UseDeferred && IsReadOnlyPtr) {
+            DeferredAccesses.insert(Access);
+            continue;
+          }
+
+          // If this is a write - check other reads and writes for conflicts. If
+          // this is a read only check other writes for conflicts (but only if
+          // there is no other write to the ptr - this is an optimization to
+          // catch "a[i] = a[i] + " without having to do a dependence check).
+          if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+            CheckDeps.insert(Access);
+            IsRTCheckAnalysisNeeded = true;
+          }
+
+          if (IsWrite)
+            SetHasWrite = true;
+
+          // Create sets of pointers connected by a shared alias set and
+          // underlying object.
+          typedef SmallVector<Value *, 16> ValueVector;
+          ValueVector TempObjects;
+
+          GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+          DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
+          for (Value *UnderlyingObj : TempObjects) {
+            // nullptr never alias, don't join sets for pointer that have "null"
+            // in their UnderlyingObjects list.
+            if (isa<ConstantPointerNull>(UnderlyingObj))
+              continue;
+
+            UnderlyingObjToAccessMap::iterator Prev =
+                ObjToLastAccess.find(UnderlyingObj);
+            if (Prev != ObjToLastAccess.end())
+              DepCands.unionSets(Access, Prev->second);
+
+            ObjToLastAccess[UnderlyingObj] = Access;
+            DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
+          }
+        }
+      }
+    }
+  }
+}
+
+static bool isInBoundsGep(Value *Ptr) {
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+    return GEP->isInBounds();
+  return false;
+}
+
+/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
+/// i.e. monotonically increasing/decreasing.
+static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
+                           ScalarEvolution *SE, const Loop *L) {
+  // FIXME: This should probably only return true for NUW.
+  if (AR->getNoWrapFlags(SCEV::NoWrapMask))
+    return true;
+
+  // Scalar evolution does not propagate the non-wrapping flags to values that
+  // are derived from a non-wrapping induction variable because non-wrapping
+  // could be flow-sensitive.
+  //
+  // Look through the potentially overflowing instruction to try to prove
+  // non-wrapping for the *specific* value of Ptr.
+
+  // The arithmetic implied by an inbounds GEP can't overflow.
+  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || !GEP->isInBounds())
+    return false;
+
+  // Make sure there is only one non-const index and analyze that.
+  Value *NonConstIndex = nullptr;
+  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+    if (!isa<ConstantInt>(*Index)) {
+      if (NonConstIndex)
+        return false;
+      NonConstIndex = *Index;
+    }
+  if (!NonConstIndex)
+    // The recurrence is on the pointer, ignore for now.
+    return false;
+
+  // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
+  // AddRec using a NSW operation.
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
+    if (OBO->hasNoSignedWrap() &&
+        // Assume constant for other the operand so that the AddRec can be
+        // easily found.
+        isa<ConstantInt>(OBO->getOperand(1))) {
+      auto *OpScev = SE->getSCEV(OBO->getOperand(0));
+
+      if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
+        return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
+    }
+
+  return false;
+}
+
+/// \brief Check whether the access through \p Ptr has a constant stride.
+int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
+                       const Loop *Lp, const ValueToValueMap &StridesMap) {
+  Type *Ty = Ptr->getType();
+  assert(Ty->isPointerTy() && "Unexpected non-ptr");
+
+  // Make sure that the pointer does not point to aggregate types.
+  auto *PtrTy = cast<PointerType>(Ty);
+  if (PtrTy->getElementType()->isAggregateType()) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
+          << *Ptr << "\n");
+    return 0;
+  }
+
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
+
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+  if (!AR) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
+          << *Ptr << " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  // The accesss function must stride over the innermost loop.
+  if (Lp != AR->getLoop()) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
+          *Ptr << " SCEV: " << *PtrScev << "\n");
+  }
+
+  // The address calculation must not wrap. Otherwise, a dependence could be
+  // inverted.
+  // An inbounds getelementptr that is a AddRec with a unit stride
+  // cannot wrap per definition. The unit stride requirement is checked later.
+  // An getelementptr without an inbounds attribute and unit stride would have
+  // to access the pointer value "0" which is undefined behavior in address
+  // space 0, therefore we can also vectorize this case.
+  bool IsInBoundsGEP = isInBoundsGep(Ptr);
+  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);
+  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+    DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+                 << *Ptr << " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  // Check the step is constant.
+  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
+
+  // Calculate the pointer stride and check if it is constant.
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+  if (!C) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
+          " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+  const APInt &APStepVal = C->getAPInt();
+
+  // Huge step value - give up.
+  if (APStepVal.getBitWidth() > 64)
+    return 0;
+
+  int64_t StepVal = APStepVal.getSExtValue();
+
+  // Strided access.
+  int64_t Stride = StepVal / Size;
+  int64_t Rem = StepVal % Size;
+  if (Rem)
+    return 0;
+
+  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
+  // know we can't "wrap around the address space". In case of address space
+  // zero we know that this won't happen without triggering undefined behavior.
+  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+      Stride != 1 && Stride != -1)
+    return 0;
+
+  return Stride;
+}
+
+bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+  switch (Type) {
+  case NoDep:
+  case Forward:
+  case BackwardVectorizable:
+    return true;
+
+  case Unknown:
+  case ForwardButPreventsForwarding:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return false;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isBackward() const {
+  switch (Type) {
+  case NoDep:
+  case Forward:
+  case ForwardButPreventsForwarding:
+  case Unknown:
+    return false;
+
+  case BackwardVectorizable:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return true;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+  return isBackward() || Type == Unknown;
+}
+
+bool MemoryDepChecker::Dependence::isForward() const {
+  switch (Type) {
+  case Forward:
+  case ForwardButPreventsForwarding:
+    return true;
+
+  case NoDep:
+  case Unknown:
+  case BackwardVectorizable:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return false;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
+                                                    unsigned TypeByteSize) {
+  // If loads occur at a distance that is not a multiple of a feasible vector
+  // factor store-load forwarding does not take place.
+  // Positive dependences might cause troubles because vectorizing them might
+  // prevent store-load forwarding making vectorized code run a lot slower.
+  //   a[i] = a[i-3] ^ a[i-8];
+  //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
+  //   hence on your typical architecture store-load forwarding does not take
+  //   place. Vectorizing in such cases does not make sense.
+  // Store-load forwarding distance.
+  const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
+  // Maximum vector factor.
+  unsigned MaxVFWithoutSLForwardIssues =
+    VectorizerParams::MaxVectorWidth * TypeByteSize;
+  if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+    MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
+
+  for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
+       vf *= 2) {
+    if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
+      MaxVFWithoutSLForwardIssues = (vf >>=1);
+      break;
+    }
+  }
+
+  if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
+    DEBUG(dbgs() << "LAA: Distance " << Distance <<
+          " that could cause a store-load forwarding conflict\n");
+    return true;
+  }
+
+  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+      MaxVFWithoutSLForwardIssues !=
+      VectorizerParams::MaxVectorWidth * TypeByteSize)
+    MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+  return false;
+}
+
+/// \brief Check the dependence for two accesses with the same stride \p Stride.
+/// \p Distance is the positive distance and \p TypeByteSize is type size in
+/// bytes.
+///
+/// \returns true if they are independent.
+static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
+                                          unsigned TypeByteSize) {
+  assert(Stride > 1 && "The stride must be greater than 1");
+  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
+  assert(Distance > 0 && "The distance must be non-zero");
+
+  // Skip if the distance is not multiple of type byte size.
+  if (Distance % TypeByteSize)
+    return false;
+
+  unsigned ScaledDist = Distance / TypeByteSize;
+
+  // No dependence if the scaled distance is not multiple of the stride.
+  // E.g.
+  //      for (i = 0; i < 1024 ; i += 4)
+  //        A[i+2] = A[i] + 1;
+  //
+  // Two accesses in memory (scaled distance is 2, stride is 4):
+  //     | A[0] |      |      |      | A[4] |      |      |      |
+  //     |      |      | A[2] |      |      |      | A[6] |      |
+  //
+  // E.g.
+  //      for (i = 0; i < 1024 ; i += 3)
+  //        A[i+4] = A[i] + 1;
+  //
+  // Two accesses in memory (scaled distance is 4, stride is 3):
+  //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
+  //     |      |      |      |      | A[4] |      |      | A[7] |      |
+  return ScaledDist % Stride;
+}
+
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+                              const MemAccessInfo &B, unsigned BIdx,
+                              const ValueToValueMap &Strides) {
+  assert (AIdx < BIdx && "Must pass arguments in program order");
+
+  Value *APtr = A.getPointer();
+  Value *BPtr = B.getPointer();
+  bool AIsWrite = A.getInt();
+  bool BIsWrite = B.getInt();
+
+  // Two reads are independent.
+  if (!AIsWrite && !BIsWrite)
+    return Dependence::NoDep;
+
+  // We cannot check pointers in different address spaces.
+  if (APtr->getType()->getPointerAddressSpace() !=
+      BPtr->getType()->getPointerAddressSpace())
+    return Dependence::Unknown;
+
+  const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr);
+  const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr);
+
+  int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides);
+  int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);
+
+  const SCEV *Src = AScev;
+  const SCEV *Sink = BScev;
+
+  // If the induction step is negative we have to invert source and sink of the
+  // dependence.
+  if (StrideAPtr < 0) {
+    //Src = BScev;
+    //Sink = AScev;
+    std::swap(APtr, BPtr);
+    std::swap(Src, Sink);
+    std::swap(AIsWrite, BIsWrite);
+    std::swap(AIdx, BIdx);
+    std::swap(StrideAPtr, StrideBPtr);
+  }
+
+  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
+
+  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+               << "(Induction step: " << StrideAPtr << ")\n");
+  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+               << *InstMap[BIdx] << ": " << *Dist << "\n");
+
+  // Need accesses with constant stride. We don't want to vectorize
+  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
+  // the address space.
+  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+    DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+    return Dependence::Unknown;
+  }
+
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+  if (!C) {
+    DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+    ShouldRetryWithRuntimeCheck = true;
+    return Dependence::Unknown;
+  }
+
+  Type *ATy = APtr->getType()->getPointerElementType();
+  Type *BTy = BPtr->getType()->getPointerElementType();
+  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+  unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
+
+  // Negative distances are not plausible dependencies.
+  const APInt &Val = C->getAPInt();
+  if (Val.isNegative()) {
+    bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+    if (IsTrueDataDependence &&
+        (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
+         ATy != BTy))
+      return Dependence::ForwardButPreventsForwarding;
+
+    DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
+    return Dependence::Forward;
+  }
+
+  // Write to the same location with the same size.
+  // Could be improved to assert type sizes are the same (i32 == float, etc).
+  if (Val == 0) {
+    if (ATy == BTy)
+      return Dependence::Forward;
+    DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+    return Dependence::Unknown;
+  }
+
+  assert(Val.isStrictlyPositive() && "Expect a positive value");
+
+  if (ATy != BTy) {
+    DEBUG(dbgs() <<
+          "LAA: ReadWrite-Write positive dependency with different types\n");
+    return Dependence::Unknown;
+  }
+
+  unsigned Distance = (unsigned) Val.getZExtValue();
+
+  unsigned Stride = std::abs(StrideAPtr);
+  if (Stride > 1 &&
+      areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
+    DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+    return Dependence::NoDep;
+  }
+
+  // Bail out early if passed-in parameters make vectorization not feasible.
+  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
+                           VectorizerParams::VectorizationFactor : 1);
+  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
+                           VectorizerParams::VectorizationInterleave : 1);
+  // The minimum number of iterations for a vectorized/unrolled version.
+  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
+
+  // It's not vectorizable if the distance is smaller than the minimum distance
+  // needed for a vectroized/unrolled version. Vectorizing one iteration in
+  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
+  // TypeByteSize (No need to plus the last gap distance).
+  //
+  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+  //      foo(int *A) {
+  //        int *B = (int *)((char *)A + 14);
+  //        for (i = 0 ; i < 1024 ; i += 2)
+  //          B[i] = A[i] + 1;
+  //      }
+  //
+  // Two accesses in memory (stride is 2):
+  //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
+  //                              | B[0] |      | B[2] |      | B[4] |
+  //
+  // Distance needs for vectorizing iterations except the last iteration:
+  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
+  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
+  //
+  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
+  // 12, which is less than distance.
+  //
+  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
+  // the minimum distance needed is 28, which is greater than distance. It is
+  // not safe to do vectorization.
+  unsigned MinDistanceNeeded =
+      TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+  if (MinDistanceNeeded > Distance) {
+    DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
+                 << '\n');
+    return Dependence::Backward;
+  }
+
+  // Unsafe if the minimum distance needed is greater than max safe distance.
+  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
+    DEBUG(dbgs() << "LAA: Failure because it needs at least "
+                 << MinDistanceNeeded << " size in bytes");
+    return Dependence::Backward;
+  }
+
+  // Positive distance bigger than max vectorization factor.
+  // FIXME: Should use max factor instead of max distance in bytes, which could
+  // not handle different types.
+  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
+  //      void foo (int *A, char *B) {
+  //        for (unsigned i = 0; i < 1024; i++) {
+  //          A[i+2] = A[i] + 1;
+  //          B[i+2] = B[i] + 1;
+  //        }
+  //      }
+  //
+  // This case is currently unsafe according to the max safe distance. If we
+  // analyze the two accesses on array B, the max safe dependence distance
+  // is 2. Then we analyze the accesses on array A, the minimum distance needed
+  // is 8, which is less than 2 and forbidden vectorization, But actually
+  // both A and B could be vectorized by 2 iterations.
+  MaxSafeDepDistBytes =
+      Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
+
+  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+  if (IsTrueDataDependence &&
+      couldPreventStoreLoadForward(Distance, TypeByteSize))
+    return Dependence::BackwardVectorizableButPreventsForwarding;
+
+  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
+               << " with max VF = "
+               << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
+
+  return Dependence::BackwardVectorizable;
+}
+
+bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
+                                   MemAccessInfoSet &CheckDeps,
+                                   const ValueToValueMap &Strides) {
+
+  MaxSafeDepDistBytes = -1U;
+  while (!CheckDeps.empty()) {
+    MemAccessInfo CurAccess = *CheckDeps.begin();
+
+    // Get the relevant memory access set.
+    EquivalenceClasses<MemAccessInfo>::iterator I =
+      AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
+
+    // Check accesses within this set.
+    EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
+    AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
+
+    // Check every access pair.
+    while (AI != AE) {
+      CheckDeps.erase(*AI);
+      EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+      while (OI != AE) {
+        // Check every accessing instruction pair in program order.
+        for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
+             I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
+          for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
+               I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+            auto A = std::make_pair(&*AI, *I1);
+            auto B = std::make_pair(&*OI, *I2);
+
+            assert(*I1 != *I2);
+            if (*I1 > *I2)
+              std::swap(A, B);
+
+            Dependence::DepType Type =
+                isDependent(*A.first, A.second, *B.first, B.second, Strides);
+            SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+
+            // Gather dependences unless we accumulated MaxDependences
+            // dependences.  In that case return as soon as we find the first
+            // unsafe dependence.  This puts a limit on this quadratic
+            // algorithm.
+            if (RecordDependences) {
+              if (Type != Dependence::NoDep)
+                Dependences.push_back(Dependence(A.second, B.second, Type));
+
+              if (Dependences.size() >= MaxDependences) {
+                RecordDependences = false;
+                Dependences.clear();
+                DEBUG(dbgs() << "Too many dependences, stopped recording\n");
+              }
+            }
+            if (!RecordDependences && !SafeForVectorization)
+              return false;
+          }
+        ++OI;
+      }
+      AI++;
+    }
+  }
+
+  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
+  return SafeForVectorization;
+}
+
+SmallVector<Instruction *, 4>
+MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
+  MemAccessInfo Access(Ptr, isWrite);
+  auto &IndexVector = Accesses.find(Access)->second;
+
+  SmallVector<Instruction *, 4> Insts;
+  std::transform(IndexVector.begin(), IndexVector.end(),
+                 std::back_inserter(Insts),
+                 [&](unsigned Idx) { return this->InstMap[Idx]; });
+  return Insts;
+}
+
+const char *MemoryDepChecker::Dependence::DepName[] = {
+    "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
+    "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
+
+void MemoryDepChecker::Dependence::print(
+    raw_ostream &OS, unsigned Depth,
+    const SmallVectorImpl<Instruction *> &Instrs) const {
+  OS.indent(Depth) << DepName[Type] << ":\n";
+  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
+  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
+}
+
+bool LoopAccessInfo::canAnalyzeLoop() {
+  // We need to have a loop header.
+  DEBUG(dbgs() << "LAA: Found a loop: " <<
+        TheLoop->getHeader()->getName() << '\n');
+
+    // We can only analyze innermost loops.
+  if (!TheLoop->empty()) {
+    DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
+    emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+    return false;
+  }
+
+  // We must have a single backedge.
+  if (TheLoop->getNumBackEdges() != 1) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // We must have a single exiting block.
+  if (!TheLoop->getExitingBlock()) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // We only handle bottom-tested loops, i.e. loop in which the condition is
+  // checked at the end of each iteration. With that we can assume that all
+  // instructions in the loop are executed the same number of times.
+  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // ScalarEvolution needs to be able to find the exit count.
+  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+    emitAnalysis(LoopAccessReport()
+                 << "could not determine number of loop iterations");
+    DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
+    return false;
+  }
+
+  return true;
+}
+
+void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
+
+  typedef SmallVector<Value*, 16> ValueVector;
+  typedef SmallPtrSet<Value*, 16> ValueSet;
+
+  // Holds the Load and Store *instructions*.
+  ValueVector Loads;
+  ValueVector Stores;
+
+  // Holds all the different accesses in the loop.
+  unsigned NumReads = 0;
+  unsigned NumReadWrites = 0;
+
+  PtrRtChecking.Pointers.clear();
+  PtrRtChecking.Need = false;
+
+  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      // If this is a load, save it. If this instruction can read from memory
+      // but is not a load, then we quit. Notice that we don't handle function
+      // calls that read or write.
+      if (it->mayReadFromMemory()) {
+        // Many math library functions read the rounding mode. We will only
+        // vectorize a loop if it contains known function calls that don't set
+        // the flag. Therefore, it is safe to ignore this read from memory.
+        CallInst *Call = dyn_cast<CallInst>(it);
+        if (Call && getIntrinsicIDForCall(Call, TLI))
+          continue;
+
+        // If the function has an explicit vectorized counterpart, we can safely
+        // assume that it can be vectorized.
+        if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
+            TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
+          continue;
+
+        LoadInst *Ld = dyn_cast<LoadInst>(it);
+        if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+          emitAnalysis(LoopAccessReport(Ld)
+                       << "read with atomic ordering or volatile read");
+          DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+          CanVecMem = false;
+          return;
+        }
+        NumLoads++;
+        Loads.push_back(Ld);
+        DepChecker.addAccess(Ld);
+        continue;
+      }
+
+      // Save 'store' instructions. Abort if other instructions write to memory.
+      if (it->mayWriteToMemory()) {
+        StoreInst *St = dyn_cast<StoreInst>(it);
+        if (!St) {
+          emitAnalysis(LoopAccessReport(&*it) <<
+                       "instruction cannot be vectorized");
+          CanVecMem = false;
+          return;
+        }
+        if (!St->isSimple() && !IsAnnotatedParallel) {
+          emitAnalysis(LoopAccessReport(St)
+                       << "write with atomic ordering or volatile write");
+          DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+          CanVecMem = false;
+          return;
+        }
+        NumStores++;
+        Stores.push_back(St);
+        DepChecker.addAccess(St);
+      }
+    } // Next instr.
+  } // Next block.
+
+  // Now we have two lists that hold the loads and the stores.
+  // Next, we find the pointers that they use.
+
+  // Check if we see any stores. If there are no stores, then we don't
+  // care if the pointers are *restrict*.
+  if (!Stores.size()) {
+    DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+    CanVecMem = true;
+    return;
+  }
+
+  MemoryDepChecker::DepCandidates DependentAccesses;
+  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
+                          AA, LI, DependentAccesses, PSE);
+
+  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+  // multiple times on the same object. If the ptr is accessed twice, once
+  // for read and once for write, it will only appear once (on the write
+  // list). This is okay, since we are going to check for conflicts between
+  // writes and between reads and writes, but not between reads and reads.
+  ValueSet Seen;
+
+  ValueVector::iterator I, IE;
+  for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+    StoreInst *ST = cast<StoreInst>(*I);
+    Value* Ptr = ST->getPointerOperand();
+    // Check for store to loop invariant address.
+    StoreToLoopInvariantAddress |= isUniform(Ptr);
+    // If we did *not* see this pointer before, insert it to  the read-write
+    // list. At this phase it is only a 'write' list.
+    if (Seen.insert(Ptr).second) {
+      ++NumReadWrites;
+
+      MemoryLocation Loc = MemoryLocation::get(ST);
+      // The TBAA metadata could have a control dependency on the predication
+      // condition, so we cannot rely on it when determining whether or not we
+      // need runtime pointer checks.
+      if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
+        Loc.AATags.TBAA = nullptr;
+
+      Accesses.addStore(Loc);
+    }
+  }
+
+  if (IsAnnotatedParallel) {
+    DEBUG(dbgs()
+          << "LAA: A loop annotated parallel, ignore memory dependency "
+          << "checks.\n");
+    CanVecMem = true;
+    return;
+  }
+
+  for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+    LoadInst *LD = cast<LoadInst>(*I);
+    Value* Ptr = LD->getPointerOperand();
+    // If we did *not* see this pointer before, insert it to the
+    // read list. If we *did* see it before, then it is already in
+    // the read-write list. This allows us to vectorize expressions
+    // such as A[i] += x;  Because the address of A[i] is a read-write
+    // pointer. This only works if the index of A[i] is consecutive.
+    // If the address of i is unknown (for example A[B[i]]) then we may
+    // read a few words, modify, and write a few words, and some of the
+    // words may be written to the same address.
+    bool IsReadOnlyPtr = false;
+    if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {
+      ++NumReads;
+      IsReadOnlyPtr = true;
+    }
+
+    MemoryLocation Loc = MemoryLocation::get(LD);
+    // The TBAA metadata could have a control dependency on the predication
+    // condition, so we cannot rely on it when determining whether or not we
+    // need runtime pointer checks.
+    if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
+      Loc.AATags.TBAA = nullptr;
+
+    Accesses.addLoad(Loc, IsReadOnlyPtr);
+  }
+
+  // If we write (or read-write) to a single destination and there are no
+  // other reads in this loop then is it safe to vectorize.
+  if (NumReadWrites == 1 && NumReads == 0) {
+    DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+    CanVecMem = true;
+    return;
+  }
+
+  // Build dependence sets and check whether we need a runtime pointer bounds
+  // check.
+  Accesses.buildDependenceSets();
+
+  // Find pointers with computable bounds. We are going to use this information
+  // to place a runtime bound check.
+  bool CanDoRTIfNeeded =
+      Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);
+  if (!CanDoRTIfNeeded) {
+    emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+                 << "the array bounds.\n");
+    CanVecMem = false;
+    return;
+  }
+
+  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+
+  CanVecMem = true;
+  if (Accesses.isDependencyCheckNeeded()) {
+    DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+    CanVecMem = DepChecker.areDepsSafe(
+        DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
+    MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+      DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+
+      // Clear the dependency checks. We assume they are not needed.
+      Accesses.resetDepChecks(DepChecker);
+
+      PtrRtChecking.reset();
+      PtrRtChecking.Need = true;
+
+      auto *SE = PSE.getSE();
+      CanDoRTIfNeeded =
+          Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
+
+      // Check that we found the bounds for the pointer.
+      if (!CanDoRTIfNeeded) {
+        emitAnalysis(LoopAccessReport()
+                     << "cannot check memory dependencies at runtime");
+        DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+        CanVecMem = false;
+        return;
+      }
+
+      CanVecMem = true;
+    }
+  }
+
+  if (CanVecMem)
+    DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
+                 << (PtrRtChecking.Need ? "" : " don't")
+                 << " need runtime memory checks.\n");
+  else {
+    emitAnalysis(LoopAccessReport() <<
+                 "unsafe dependent memory operations in loop");
+    DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+  }
+}
+
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+                                           DominatorTree *DT)  {
+  assert(TheLoop->contains(BB) && "Unknown block used");
+
+  // Blocks that do not dominate the latch need predication.
+  BasicBlock* Latch = TheLoop->getLoopLatch();
+  return !DT->dominates(BB, Latch);
+}
+
+void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+  assert(!Report && "Multiple reports generated");
+  Report = Message;
+}
+
+bool LoopAccessInfo::isUniform(Value *V) const {
+  return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));
+}
+
+// FIXME: this function is currently a duplicate of the one in
+// LoopVectorize.cpp.
+static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
+                                 Instruction *Loc) {
+  if (FirstInst)
+    return FirstInst;
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == Loc->getParent() ? I : nullptr;
+  return nullptr;
+}
+
+namespace {
+/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values.  Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+  TrackingVH<Value> Start;
+  TrackingVH<Value> End;
+};
+} // end anonymous namespace
+
+/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop.  \return the values for the bounds.
+static PointerBounds
+expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+             Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
+             const RuntimePointerChecking &PtrRtChecking) {
+  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+  const SCEV *Sc = SE->getSCEV(Ptr);
+
+  if (SE->isLoopInvariant(Sc, TheLoop)) {
+    DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+                 << "\n");
+    return {Ptr, Ptr};
+  } else {
+    unsigned AS = Ptr->getType()->getPointerAddressSpace();
+    LLVMContext &Ctx = Loc->getContext();
+
+    // Use this type for pointer arithmetic.
+    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+    Value *Start = nullptr, *End = nullptr;
+
+    DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+    Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+    End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+    DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+    return {Start, End};
+  }
+}
+
+/// \brief Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
+    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
+    Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
+    const RuntimePointerChecking &PtrRtChecking) {
+  SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+  // Here we're relying on the SCEV Expander's cache to only emit code for the
+  // same bounds once.
+  std::transform(
+      PointerChecks.begin(), PointerChecks.end(),
+      std::back_inserter(ChecksWithBounds),
+      [&](const RuntimePointerChecking::PointerCheck &Check) {
+        PointerBounds
+          First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
+          Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
+        return std::make_pair(First, Second);
+      });
+
+  return ChecksWithBounds;
+}
+
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
+    Instruction *Loc,
+    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
+    const {
+  auto *SE = PSE.getSE();
+  SCEVExpander Exp(*SE, DL, "induction");
+  auto ExpandedChecks =
+      expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
+
+  LLVMContext &Ctx = Loc->getContext();
+  Instruction *FirstInst = nullptr;
+  IRBuilder<> ChkBuilder(Loc);
+  // Our instructions might fold to a constant.
+  Value *MemoryRuntimeCheck = nullptr;
+
+  for (const auto &Check : ExpandedChecks) {
+    const PointerBounds &A = Check.first, &B = Check.second;
+    // Check if two pointers (A and B) conflict where conflict is computed as:
+    // start(A) <= end(B) && start(B) <= end(A)
+    unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+    unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+    assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+           (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+           "Trying to bounds check pointers with different address spaces");
+
+    Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+    Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+    Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+    Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+    Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc");
+    Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc");
+
+    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+    FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+    FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+    Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+    FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+    if (MemoryRuntimeCheck) {
+      IsConflict =
+          ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
+      FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+    }
+    MemoryRuntimeCheck = IsConflict;
+  }
+
+  if (!MemoryRuntimeCheck)
+    return std::make_pair(nullptr, nullptr);
+
+  // We have to do this trickery because the IRBuilder might fold the check to a
+  // constant expression in which case there is no Instruction anchored in a
+  // the block.
+  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+                                                 ConstantInt::getTrue(Ctx));
+  ChkBuilder.Insert(Check, "memcheck.conflict");
+  FirstInst = getFirstInst(FirstInst, Check, Loc);
+  return std::make_pair(FirstInst, Check);
+}
+
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
+  if (!PtrRtChecking.Need)
+    return std::make_pair(nullptr, nullptr);
+
+  return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
+}
+
+LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
+                               const DataLayout &DL,
+                               const TargetLibraryInfo *TLI, AliasAnalysis *AA,
+                               DominatorTree *DT, LoopInfo *LI,
+                               const ValueToValueMap &Strides)
+    : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
+      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      StoreToLoopInvariantAddress(false) {
+  if (canAnalyzeLoop())
+    analyzeLoop(Strides);
+}
+
+void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
+  if (CanVecMem) {
+    if (PtrRtChecking.Need)
+      OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+    else
+      OS.indent(Depth) << "Memory dependences are safe\n";
+  }
+
+  if (Report)
+    OS.indent(Depth) << "Report: " << Report->str() << "\n";
+
+  if (auto *Dependences = DepChecker.getDependences()) {
+    OS.indent(Depth) << "Dependences:\n";
+    for (auto &Dep : *Dependences) {
+      Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
+      OS << "\n";
+    }
+  } else
+    OS.indent(Depth) << "Too many dependences, not recorded\n";
+
+  // List the pair of accesses need run-time checks to prove independence.
+  PtrRtChecking.print(OS, Depth);
+  OS << "\n";
+
+  OS.indent(Depth) << "Store to invariant address was "
+                   << (StoreToLoopInvariantAddress ? "" : "not ")
+                   << "found in loop.\n";
+
+  OS.indent(Depth) << "SCEV assumptions:\n";
+  PSE.getUnionPredicate().print(OS, Depth);
+}
+
+const LoopAccessInfo &
+LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
+  auto &LAI = LoopAccessInfoMap[L];
+
+#ifndef NDEBUG
+  assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
+         "Symbolic strides changed for loop");
+#endif
+
+  if (!LAI) {
+    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+    LAI =
+        llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);
+#ifndef NDEBUG
+    LAI->NumSymbolicStrides = Strides.size();
+#endif
+  }
+  return *LAI.get();
+}
+
+void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
+  LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
+
+  ValueToValueMap NoSymbolicStrides;
+
+  for (Loop *TopLevelLoop : *LI)
+    for (Loop *L : depth_first(TopLevelLoop)) {
+      OS.indent(2) << L->getHeader()->getName() << ":\n";
+      auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
+      LAI.print(OS, 4);
+    }
+}
+
+bool LoopAccessAnalysis::runOnFunction(Function &F) {
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TLI = TLIP ? &TLIP->getTLI() : nullptr;
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+  return false;
+}
+
+void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+    AU.setPreservesAll();
+}
+
+char LoopAccessAnalysis::ID = 0;
+static const char laa_name[] = "Loop Access Analysis";
+#define LAA_NAME "loop-accesses"
+
+INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+
+namespace llvm {
+  Pass *createLAAPass() {
+    return new LoopAccessAnalysis();
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 0000000..9ab9eea
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,778 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG.  Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops.
+template class llvm::LoopBase<BasicBlock, Loop>;
+template class llvm::LoopInfoBase<BasicBlock, Loop>;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+                cl::desc("Verify loop info (time consuming)"));
+
+// Loop identifier metadata name.
+static const char *const LoopMDName = "llvm.loop";
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(const Value *V) const {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return !contains(I);
+  return true;  // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant.
+bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
+  return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+                             Instruction *InsertPt) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return makeLoopInvariant(I, Changed, InsertPt);
+  return true;  // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+                             Instruction *InsertPt) const {
+  // Test if the value is already loop-invariant.
+  if (isLoopInvariant(I))
+    return true;
+  if (!isSafeToSpeculativelyExecute(I))
+    return false;
+  if (I->mayReadFromMemory())
+    return false;
+  // EH block instructions are immobile.
+  if (I->isEHPad())
+    return false;
+  // Determine the insertion point, unless one was given.
+  if (!InsertPt) {
+    BasicBlock *Preheader = getLoopPreheader();
+    // Without a preheader, hoisting is not feasible.
+    if (!Preheader)
+      return false;
+    InsertPt = Preheader->getTerminator();
+  }
+  // Don't hoist instructions with loop-variant operands.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+      return false;
+
+  // Hoist.
+  I->moveBefore(InsertPt);
+
+  // There is possibility of hoisting this instruction above some arbitrary
+  // condition. Any metadata defined on it can be control dependent on this
+  // condition. Conservatively strip it here so that we don't give any wrong
+  // information to the optimizer.
+  I->dropUnknownNonDebugMetadata();
+
+  Changed = true;
+  return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop.  If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+  pred_iterator PI = pred_begin(H);
+  assert(PI != pred_end(H) &&
+         "Loop must have at least one backedge!");
+  Backedge = *PI++;
+  if (PI == pred_end(H)) return nullptr;  // dead loop
+  Incoming = *PI++;
+  if (PI != pred_end(H)) return nullptr;  // multiple backedges?
+
+  if (contains(Incoming)) {
+    if (contains(Backedge))
+      return nullptr;
+    std::swap(Incoming, Backedge);
+  } else if (!contains(Backedge))
+    return nullptr;
+
+  // Loop over all of the PHI nodes, looking for a canonical indvar.
+  for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+      if (CI->isNullValue())
+        if (Instruction *Inc =
+            dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+          if (Inc->getOpcode() == Instruction::Add &&
+                Inc->getOperand(0) == PN)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+              if (CI->equalsInt(1))
+                return PN;
+  }
+  return nullptr;
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+  for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+    BasicBlock *BB = *BI;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) {
+      // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+      // optimizations, so for the purposes of considered LCSSA form, we
+      // can ignore them.
+      if (I->getType()->isTokenTy())
+        continue;
+
+      for (Use &U : I->uses()) {
+        Instruction *UI = cast<Instruction>(U.getUser());
+        BasicBlock *UserBB = UI->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(UI))
+          UserBB = P->getIncomingBlock(U);
+
+        // Check the current block, as a fast-path, before checking whether
+        // the use is anywhere in the loop.  Most values are used in the same
+        // block they are defined in.  Also, blocks not reachable from the
+        // entry are special; uses in them don't need to go through PHIs.
+        if (UserBB != BB &&
+            !contains(UserBB) &&
+            DT.isReachableFromEntry(UserBB))
+          return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
+  if (!isLCSSAForm(DT))
+    return false;
+
+  return std::all_of(begin(), end(), [&](const Loop *L) {
+    return L->isRecursivelyLCSSAForm(DT);
+  });
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+  // Normal-form loops have a preheader, a single backedge, and all of their
+  // exits have all their predecessors inside the loop.
+  return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// isSafeToClone - Return true if the loop body is safe to clone in practice.
+/// Routines that reform the loop CFG and split edges often fail on indirectbr.
+bool Loop::isSafeToClone() const {
+  // Return false if any loop blocks contain indirectbrs, or there are any calls
+  // to noduplicate functions.
+  for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+    if (isa<IndirectBrInst>((*I)->getTerminator()))
+      return false;
+
+    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+      if (II->cannotDuplicate())
+        return false;
+      // Return false if any loop blocks contain invokes to EH-pads other than
+      // landingpads;  we don't know how to split those edges yet.
+      auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI();
+      if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI))
+        return false;
+    }
+
+    for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
+      if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
+        if (CI->cannotDuplicate())
+          return false;
+      }
+      if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I))
+        return false;
+    }
+  }
+  return true;
+}
+
+MDNode *Loop::getLoopID() const {
+  MDNode *LoopID = nullptr;
+  if (isLoopSimplifyForm()) {
+    LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName);
+  } else {
+    // Go through each predecessor of the loop header and check the
+    // terminator for the metadata.
+    BasicBlock *H = getHeader();
+    for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
+      TerminatorInst *TI = (*I)->getTerminator();
+      MDNode *MD = nullptr;
+
+      // Check if this terminator branches to the loop header.
+      for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
+        if (TI->getSuccessor(i) == H) {
+          MD = TI->getMetadata(LoopMDName);
+          break;
+        }
+      }
+      if (!MD)
+        return nullptr;
+
+      if (!LoopID)
+        LoopID = MD;
+      else if (MD != LoopID)
+        return nullptr;
+    }
+  }
+  if (!LoopID || LoopID->getNumOperands() == 0 ||
+      LoopID->getOperand(0) != LoopID)
+    return nullptr;
+  return LoopID;
+}
+
+void Loop::setLoopID(MDNode *LoopID) const {
+  assert(LoopID && "Loop ID should not be null");
+  assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
+  assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+
+  if (isLoopSimplifyForm()) {
+    getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID);
+    return;
+  }
+
+  BasicBlock *H = getHeader();
+  for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
+    TerminatorInst *TI = (*I)->getTerminator();
+    for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
+      if (TI->getSuccessor(i) == H)
+        TI->setMetadata(LoopMDName, LoopID);
+    }
+  }
+}
+
+bool Loop::isAnnotatedParallel() const {
+  MDNode *desiredLoopIdMetadata = getLoopID();
+
+  if (!desiredLoopIdMetadata)
+      return false;
+
+  // The loop branch contains the parallel loop metadata. In order to ensure
+  // that any parallel-loop-unaware optimization pass hasn't added loop-carried
+  // dependencies (thus converted the loop back to a sequential loop), check
+  // that all the memory instructions in the loop contain parallelism metadata
+  // that point to the same unique "loop id metadata" the loop branch does.
+  for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) {
+    for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end();
+         II != EE; II++) {
+
+      if (!II->mayReadOrWriteMemory())
+        continue;
+
+      // The memory instruction can refer to the loop identifier metadata
+      // directly or indirectly through another list metadata (in case of
+      // nested parallel loops). The loop identifier metadata refers to
+      // itself so we can check both cases with the same routine.
+      MDNode *loopIdMD =
+          II->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+
+      if (!loopIdMD)
+        return false;
+
+      bool loopIdMDFound = false;
+      for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
+        if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
+          loopIdMDFound = true;
+          break;
+        }
+      }
+
+      if (!loopIdMDFound)
+        return false;
+    }
+  }
+  return true;
+}
+
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+  // Each predecessor of each exit block of a normal loop is contained
+  // within the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  getExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+         PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+      if (!contains(*PI))
+        return false;
+  // All the requirements are met.
+  return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+  assert(hasDedicatedExits() &&
+         "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+  SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+    BasicBlock *current = *BI;
+    switchExitBlocks.clear();
+
+    for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+      // If block is inside the loop then it is not a exit block.
+      if (contains(*I))
+        continue;
+
+      pred_iterator PI = pred_begin(*I);
+      BasicBlock *firstPred = *PI;
+
+      // If current basic block is this exit block's first predecessor
+      // then only insert exit block in to the output ExitBlocks vector.
+      // This ensures that same exit block is not inserted twice into
+      // ExitBlocks vector.
+      if (current != firstPred)
+        continue;
+
+      // If a terminator has more then two successors, for example SwitchInst,
+      // then it is possible that there are multiple edges from current block
+      // to one exit block.
+      if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+        ExitBlocks.push_back(*I);
+        continue;
+      }
+
+      // In case of multiple edges from current block to exit block, collect
+      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+      // duplicate edges.
+      if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+          == switchExitBlocks.end()) {
+        switchExitBlocks.push_back(*I);
+        ExitBlocks.push_back(*I);
+      }
+    }
+  }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+  SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  if (UniqueExitBlocks.size() == 1)
+    return UniqueExitBlocks[0];
+  return nullptr;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void Loop::dump() const {
+  print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// UnloopUpdater implementation
+//
+
+namespace {
+/// Find the new parent loop for all blocks within the "unloop" whose last
+/// backedges has just been removed.
+class UnloopUpdater {
+  Loop *Unloop;
+  LoopInfo *LI;
+
+  LoopBlocksDFS DFS;
+
+  // Map unloop's immediate subloops to their nearest reachable parents. Nested
+  // loops within these subloops will not change parents. However, an immediate
+  // subloop's new parent will be the nearest loop reachable from either its own
+  // exits *or* any of its nested loop's exits.
+  DenseMap<Loop*, Loop*> SubloopParents;
+
+  // Flag the presence of an irreducible backedge whose destination is a block
+  // directly contained by the original unloop.
+  bool FoundIB;
+
+public:
+  UnloopUpdater(Loop *UL, LoopInfo *LInfo) :
+    Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+
+  void updateBlockParents();
+
+  void removeBlocksFromAncestors();
+
+  void updateSubloopParents();
+
+protected:
+  Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop);
+};
+} // end anonymous namespace
+
+/// updateBlockParents - Update the parent loop for all blocks that are directly
+/// contained within the original "unloop".
+void UnloopUpdater::updateBlockParents() {
+  if (Unloop->getNumBlocks()) {
+    // Perform a post order CFG traversal of all blocks within this loop,
+    // propagating the nearest loop from sucessors to predecessors.
+    LoopBlocksTraversal Traversal(DFS, LI);
+    for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+           POE = Traversal.end(); POI != POE; ++POI) {
+
+      Loop *L = LI->getLoopFor(*POI);
+      Loop *NL = getNearestLoop(*POI, L);
+
+      if (NL != L) {
+        // For reducible loops, NL is now an ancestor of Unloop.
+        assert((NL != Unloop && (!NL || NL->contains(Unloop))) &&
+               "uninitialized successor");
+        LI->changeLoopFor(*POI, NL);
+      }
+      else {
+        // Or the current block is part of a subloop, in which case its parent
+        // is unchanged.
+        assert((FoundIB || Unloop->contains(L)) && "uninitialized successor");
+      }
+    }
+  }
+  // Each irreducible loop within the unloop induces a round of iteration using
+  // the DFS result cached by Traversal.
+  bool Changed = FoundIB;
+  for (unsigned NIters = 0; Changed; ++NIters) {
+    assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm");
+
+    // Iterate over the postorder list of blocks, propagating the nearest loop
+    // from successors to predecessors as before.
+    Changed = false;
+    for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(),
+           POE = DFS.endPostorder(); POI != POE; ++POI) {
+
+      Loop *L = LI->getLoopFor(*POI);
+      Loop *NL = getNearestLoop(*POI, L);
+      if (NL != L) {
+        assert(NL != Unloop && (!NL || NL->contains(Unloop)) &&
+               "uninitialized successor");
+        LI->changeLoopFor(*POI, NL);
+        Changed = true;
+      }
+    }
+  }
+}
+
+/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
+/// their new parents.
+void UnloopUpdater::removeBlocksFromAncestors() {
+  // Remove all unloop's blocks (including those in nested subloops) from
+  // ancestors below the new parent loop.
+  for (Loop::block_iterator BI = Unloop->block_begin(),
+         BE = Unloop->block_end(); BI != BE; ++BI) {
+    Loop *OuterParent = LI->getLoopFor(*BI);
+    if (Unloop->contains(OuterParent)) {
+      while (OuterParent->getParentLoop() != Unloop)
+        OuterParent = OuterParent->getParentLoop();
+      OuterParent = SubloopParents[OuterParent];
+    }
+    // Remove blocks from former Ancestors except Unloop itself which will be
+    // deleted.
+    for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent;
+         OldParent = OldParent->getParentLoop()) {
+      assert(OldParent && "new loop is not an ancestor of the original");
+      OldParent->removeBlockFromLoop(*BI);
+    }
+  }
+}
+
+/// updateSubloopParents - Update the parent loop for all subloops directly
+/// nested within unloop.
+void UnloopUpdater::updateSubloopParents() {
+  while (!Unloop->empty()) {
+    Loop *Subloop = *std::prev(Unloop->end());
+    Unloop->removeChildLoop(std::prev(Unloop->end()));
+
+    assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop");
+    if (Loop *Parent = SubloopParents[Subloop])
+      Parent->addChildLoop(Subloop);
+    else
+      LI->addTopLevelLoop(Subloop);
+  }
+}
+
+/// getNearestLoop - Return the nearest parent loop among this block's
+/// successors. If a successor is a subloop header, consider its parent to be
+/// the nearest parent of the subloop's exits.
+///
+/// For subloop blocks, simply update SubloopParents and return NULL.
+Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
+
+  // Initially for blocks directly contained by Unloop, NearLoop == Unloop and
+  // is considered uninitialized.
+  Loop *NearLoop = BBLoop;
+
+  Loop *Subloop = nullptr;
+  if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
+    Subloop = NearLoop;
+    // Find the subloop ancestor that is directly contained within Unloop.
+    while (Subloop->getParentLoop() != Unloop) {
+      Subloop = Subloop->getParentLoop();
+      assert(Subloop && "subloop is not an ancestor of the original loop");
+    }
+    // Get the current nearest parent of the Subloop exits, initially Unloop.
+    NearLoop =
+      SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second;
+  }
+
+  succ_iterator I = succ_begin(BB), E = succ_end(BB);
+  if (I == E) {
+    assert(!Subloop && "subloop blocks must have a successor");
+    NearLoop = nullptr; // unloop blocks may now exit the function.
+  }
+  for (; I != E; ++I) {
+    if (*I == BB)
+      continue; // self loops are uninteresting
+
+    Loop *L = LI->getLoopFor(*I);
+    if (L == Unloop) {
+      // This successor has not been processed. This path must lead to an
+      // irreducible backedge.
+      assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB");
+      FoundIB = true;
+    }
+    if (L != Unloop && Unloop->contains(L)) {
+      // Successor is in a subloop.
+      if (Subloop)
+        continue; // Branching within subloops. Ignore it.
+
+      // BB branches from the original into a subloop header.
+      assert(L->getParentLoop() == Unloop && "cannot skip into nested loops");
+
+      // Get the current nearest parent of the Subloop's exits.
+      L = SubloopParents[L];
+      // L could be Unloop if the only exit was an irreducible backedge.
+    }
+    if (L == Unloop) {
+      continue;
+    }
+    // Handle critical edges from Unloop into a sibling loop.
+    if (L && !L->contains(Unloop)) {
+      L = L->getParentLoop();
+    }
+    // Remember the nearest parent loop among successors or subloop exits.
+    if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L))
+      NearLoop = L;
+  }
+  if (Subloop) {
+    SubloopParents[Subloop] = NearLoop;
+    return BBLoop;
+  }
+  return NearLoop;
+}
+
+LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+  analyze(DomTree);
+}
+
+void LoopInfo::updateUnloop(Loop *Unloop) {
+  Unloop->markUnlooped();
+
+  // First handle the special case of no parent loop to simplify the algorithm.
+  if (!Unloop->getParentLoop()) {
+    // Since BBLoop had no parent, Unloop blocks are no longer in a loop.
+    for (Loop::block_iterator I = Unloop->block_begin(),
+                              E = Unloop->block_end();
+         I != E; ++I) {
+
+      // Don't reparent blocks in subloops.
+      if (getLoopFor(*I) != Unloop)
+        continue;
+
+      // Blocks no longer have a parent but are still referenced by Unloop until
+      // the Unloop object is deleted.
+      changeLoopFor(*I, nullptr);
+    }
+
+    // Remove the loop from the top-level LoopInfo object.
+    for (iterator I = begin();; ++I) {
+      assert(I != end() && "Couldn't find loop");
+      if (*I == Unloop) {
+        removeLoop(I);
+        break;
+      }
+    }
+
+    // Move all of the subloops to the top-level.
+    while (!Unloop->empty())
+      addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
+
+    return;
+  }
+
+  // Update the parent loop for all blocks within the loop. Blocks within
+  // subloops will not change parents.
+  UnloopUpdater Updater(Unloop, this);
+  Updater.updateBlockParents();
+
+  // Remove blocks from former ancestor loops.
+  Updater.removeBlocksFromAncestors();
+
+  // Add direct subloops as children in their new parent loop.
+  Updater.updateSubloopParents();
+
+  // Remove unloop from its parent loop.
+  Loop *ParentLoop = Unloop->getParentLoop();
+  for (Loop::iterator I = ParentLoop->begin();; ++I) {
+    assert(I != ParentLoop->end() && "Couldn't find loop");
+    if (*I == Unloop) {
+      ParentLoop->removeChildLoop(I);
+      break;
+    }
+  }
+}
+
+char LoopAnalysis::PassID;
+
+LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
+  // FIXME: Currently we create a LoopInfo from scratch for every function.
+  // This may prove to be too wasteful due to deallocating and re-allocating
+  // memory each time for the underlying map and vector datastructures. At some
+  // point it may prove worthwhile to use a freelist and recycle LoopInfo
+  // objects. I don't want to add that kind of complexity until the scope of
+  // the problem is better understood.
+  LoopInfo LI;
+  LI.analyze(AM->getResult<DominatorTreeAnalysis>(F));
+  return LI;
+}
+
+PreservedAnalyses LoopPrinterPass::run(Function &F,
+                                       AnalysisManager<Function> *AM) {
+  AM->getResult<LoopAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
+
+PrintLoopPass::PrintLoopPass() : OS(dbgs()) {}
+PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner)
+    : OS(OS), Banner(Banner) {}
+
+PreservedAnalyses PrintLoopPass::run(Loop &L) {
+  OS << Banner;
+  for (auto *Block : L.blocks())
+    if (Block)
+      Block->print(OS);
+    else
+      OS << "Printing <null> block";
+  return PreservedAnalyses::all();
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+
+char LoopInfoWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+                    true, true)
+
+bool LoopInfoWrapperPass::runOnFunction(Function &) {
+  releaseMemory();
+  LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+  return false;
+}
+
+void LoopInfoWrapperPass::verifyAnalysis() const {
+  // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the
+  // function each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop manually
+  // during loop pass sequences.
+  if (VerifyLoopInfo)
+    LI.verify();
+}
+
+void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
+void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
+  LI.print(OS);
+}
+
+//===----------------------------------------------------------------------===//
+// LoopBlocksDFS implementation
+//
+
+/// Traverse the loop blocks and store the DFS result.
+/// Useful for clients that just want the final DFS result and don't need to
+/// visit blocks during the initial traversal.
+void LoopBlocksDFS::perform(LoopInfo *LI) {
+  LoopBlocksTraversal Traversal(*this, LI);
+  for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+         POE = Traversal.end(); POI != POE; ++POI) ;
+}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
new file mode 100644
index 0000000..dc42473
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,347 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-pass-manager"
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPassWrapper : public LoopPass {
+  PrintLoopPass P;
+
+public:
+  static char ID;
+  PrintLoopPassWrapper() : LoopPass(ID) {}
+  PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner)
+      : LoopPass(ID), P(OS, Banner) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &) override {
+    P.run(*L);
+    return false;
+  }
+};
+
+char PrintLoopPassWrapper::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager()
+  : FunctionPass(ID), PMDataManager() {
+  LI = nullptr;
+  CurrentLoop = nullptr;
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+Loop &LPPassManager::addLoop(Loop *ParentLoop) {
+  // Create a new loop. LI will take ownership.
+  Loop *L = new Loop();
+
+  // Insert into the loop nest and the loop queue.
+  if (!ParentLoop) {
+    // This is the top level loop.
+    LI->addTopLevelLoop(L);
+    LQ.push_front(L);
+    return *L;
+  }
+
+  ParentLoop->addChildLoop(L);
+  // Insert L into the loop queue after the parent loop.
+  for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
+    if (*I == L->getParentLoop()) {
+      // deque does not support insert after.
+      ++I;
+      LQ.insert(I, 1, L);
+      break;
+    }
+  }
+  return *L;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
+                                                  BasicBlock *To, Loop *L) {
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *LP = getContainedPass(Index);
+    LP->cloneBasicBlockAnalysis(From, To, L);
+  }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
+         ++BI) {
+      Instruction &I = *BI;
+      deleteSimpleAnalysisValue(&I, L);
+    }
+  }
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *LP = getContainedPass(Index);
+    LP->deleteAnalysisValue(V, L);
+  }
+}
+
+/// Invoke deleteAnalysisLoop hook for all passes.
+void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *LP = getContainedPass(Index);
+    LP->deleteAnalysisLoop(L);
+  }
+}
+
+
+// Recurse through all subloops and all loops  into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+  LQ.push_back(L);
+  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  // LPPassManager needs LoopInfo. In the long term LoopInfo class will
+  // become part of LPPassManager.
+  Info.addRequired<LoopInfoWrapperPass>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+  auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
+  LI = &LIWP.getLoopInfo();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  // Populate the loop queue in reverse program order. There is no clear need to
+  // process sibling loops in either forward or reverse order. There may be some
+  // advantage in deleting uses in a later loop before optimizing the
+  // definitions in an earlier loop. If we find a clear reason to process in
+  // forward order, then a forward variant of LoopPassManager should be created.
+  //
+  // Note that LoopInfo::iterator visits loops in reverse program
+  // order. Here, reverse_iterator gives us a forward order, and the LoopQueue
+  // reverses the order a third time by popping from the back.
+  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+
+  if (LQ.empty()) // No loops, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+       I != E; ++I) {
+    Loop *L = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      LoopPass *P = getContainedPass(Index);
+      Changed |= P->doInitialization(L, *this);
+    }
+  }
+
+  // Walk Loops
+  while (!LQ.empty()) {
+
+    CurrentLoop = LQ.back();
+    // Run all passes on the current Loop.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      LoopPass *P = getContainedPass(Index);
+
+      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+                   CurrentLoop->getHeader()->getName());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+        TimeRegion PassTimer(getPassTimer(P));
+
+        Changed |= P->runOnLoop(CurrentLoop, *this);
+      }
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+                     CurrentLoop->isUnloop()
+                         ? "<deleted>"
+                         : CurrentLoop->getHeader()->getName());
+      dumpPreservedSet(P);
+
+      if (CurrentLoop->isUnloop()) {
+        // Notify passes that the loop is being deleted.
+        deleteSimpleAnalysisLoop(CurrentLoop);
+      } else {
+        // Manually check that this loop is still healthy. This is done
+        // instead of relying on LoopInfo::verifyLoop since LoopInfo
+        // is a function pass and it's really expensive to verify every
+        // loop in the function every time. That level of checking can be
+        // enabled with the -verify-loop-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(&LIWP));
+          CurrentLoop->verifyLoop();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+
+        F.getContext().yield();
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P, CurrentLoop->isUnloop()
+                              ? "<deleted>"
+                              : CurrentLoop->getHeader()->getName(),
+                       ON_LOOP_MSG);
+
+      if (CurrentLoop->isUnloop())
+        // Do not run other passes on this loop.
+        break;
+    }
+
+    // If the loop was deleted, release all the loop passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (CurrentLoop->isUnloop()) {
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_LOOP_MSG);
+      }
+      delete CurrentLoop;
+    }
+
+    // Pop the loop from queue after running all passes.
+    LQ.pop_back();
+  }
+
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *P = getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Loop Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintLoopPassWrapper(O, Banner);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+  // Find LPPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new LPPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+      !PMS.top()->preserveHigherLevelAnalysis(this))
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find LPPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  LPPassManager *LPPM;
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+    LPPM = (LPPassManager*)PMS.top();
+  else {
+    // Create new Loop Pass Manager if it does not exist.
+    assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Loop Pass Manager
+    LPPM = new LPPassManager();
+    LPPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(LPPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = LPPM->getAsPass();
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(LPPM);
+  }
+
+  LPPM->add(this);
+}
+
+// Containing function has Attribute::OptimizeNone and transformation
+// passes should skip it.
+bool LoopPass::skipOptnoneFunction(const Loop *L) const {
+  const Function *F = L->getHeader()->getParent();
+  if (F && F->hasFnAttribute(Attribute::OptimizeNone)) {
+    // FIXME: Report this to dbgs() only once per function.
+    DEBUG(dbgs() << "Skipping pass '" << getPassName()
+          << "' in function " << F->getName() << "\n");
+    // FIXME: Delete loop from pass manager's queue?
+    return true;
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 0000000..078cefe
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,168 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MemDepPrinter : public FunctionPass {
+    const Function *F;
+
+    enum DepType {
+      Clobber = 0,
+      Def,
+      NonFuncLocal,
+      Unknown
+    };
+
+    static const char *const DepTypeStr[];
+
+    typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair;
+    typedef std::pair<InstTypePair, const BasicBlock *> Dep;
+    typedef SmallSetVector<Dep, 4> DepSet;
+    typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+    DepSetMap Deps;
+
+    static char ID; // Pass identifcation, replacement for typeid
+    MemDepPrinter() : FunctionPass(ID) {
+      initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+
+    void print(raw_ostream &OS, const Module * = nullptr) const override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequiredTransitive<AAResultsWrapperPass>();
+      AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    void releaseMemory() override {
+      Deps.clear();
+      F = nullptr;
+    }
+
+  private:
+    static InstTypePair getInstTypePair(MemDepResult dep) {
+      if (dep.isClobber())
+        return InstTypePair(dep.getInst(), Clobber);
+      if (dep.isDef())
+        return InstTypePair(dep.getInst(), Def);
+      if (dep.isNonFuncLocal())
+        return InstTypePair(dep.getInst(), NonFuncLocal);
+      assert(dep.isUnknown() && "unexpected dependence type");
+      return InstTypePair(dep.getInst(), Unknown);
+    }
+    static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
+      return InstTypePair(inst, type);
+    }
+  };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+  return new MemDepPrinter();
+}
+
+const char *const MemDepPrinter::DepTypeStr[]
+  = {"Clobber", "Def", "NonFuncLocal", "Unknown"};
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+  this->F = &F;
+  MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+  // All this code uses non-const interfaces because MemDep is not
+  // const-friendly, though nothing is actually modified.
+  for (auto &I : instructions(F)) {
+    Instruction *Inst = &I;
+
+    if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+      continue;
+
+    MemDepResult Res = MDA.getDependency(Inst);
+    if (!Res.isNonLocal()) {
+      Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
+                                       static_cast<BasicBlock *>(nullptr)));
+    } else if (auto CS = CallSite(Inst)) {
+      const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+        MDA.getNonLocalCallDependency(CS);
+
+      DepSet &InstDeps = Deps[Inst];
+      for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
+      }
+    } else {
+      SmallVector<NonLocalDepResult, 4> NLDI;
+      assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
+               isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); 
+      MDA.getNonLocalPointerDependency(Inst, NLDI);
+
+      DepSet &InstDeps = Deps[Inst];
+      for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
+      }
+    }
+  }
+
+  return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+  for (const auto &I : instructions(*F)) {
+    const Instruction *Inst = &I;
+
+    DepSetMap::const_iterator DI = Deps.find(Inst);
+    if (DI == Deps.end())
+      continue;
+
+    const DepSet &InstDeps = DI->second;
+
+    for (const auto &I : InstDeps) {
+      const Instruction *DepInst = I.first.getPointer();
+      DepType type = I.first.getInt();
+      const BasicBlock *DepBB = I.second;
+
+      OS << "    ";
+      OS << DepTypeStr[type];
+      if (DepBB) {
+        OS << " in block ";
+        DepBB->printAsOperand(OS, /*PrintType=*/false, M);
+      }
+      if (DepInst) {
+        OS << " from: ";
+        DepInst->print(OS);
+      }
+      OS << "\n";
+    }
+
+    Inst->print(OS);
+    OS << "\n\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
new file mode 100644
index 0000000..36f1424
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -0,0 +1,78 @@
+//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MemDerefPrinter : public FunctionPass {
+    SmallVector<Value *, 4> Deref;
+    SmallPtrSet<Value *, 4> DerefAndAligned;
+
+    static char ID; // Pass identification, replacement for typeid
+    MemDerefPrinter() : FunctionPass(ID) {
+      initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+    bool runOnFunction(Function &F) override;
+    void print(raw_ostream &OS, const Module * = nullptr) const override;
+    void releaseMemory() override {
+      Deref.clear();
+      DerefAndAligned.clear();
+    }
+  };
+}
+
+char MemDerefPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
+                      "Memory Dereferenciblity of pointers in function", false, true)
+INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
+                    "Memory Dereferenciblity of pointers in function", false, true)
+
+FunctionPass *llvm::createMemDerefPrinter() {
+  return new MemDerefPrinter();
+}
+
+bool MemDerefPrinter::runOnFunction(Function &F) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  for (auto &I: instructions(F)) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      Value *PO = LI->getPointerOperand();
+      if (isDereferenceablePointer(PO, DL))
+        Deref.push_back(PO);
+      if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+        DerefAndAligned.insert(PO);
+    }
+  }
+  return false;
+}
+
+void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
+  OS << "The following are dereferenceable:\n";
+  for (Value *V: Deref) {
+    V->print(OS);
+    if (DerefAndAligned.count(V))
+      OS << "\t(aligned)";
+    else
+      OS << "\t(unaligned)";
+    OS << "\n\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 0000000..9e896ae
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,790 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "memory-builtins"
+
+enum AllocType : uint8_t {
+  OpNewLike          = 1<<0, // allocates; never returns null
+  MallocLike         = 1<<1 | OpNewLike, // allocates; may return null
+  CallocLike         = 1<<2, // allocates + bzero
+  ReallocLike        = 1<<3, // reallocates
+  StrDupLike         = 1<<4,
+  AllocLike          = MallocLike | CallocLike | StrDupLike,
+  AnyAlloc           = AllocLike | ReallocLike
+};
+
+struct AllocFnsTy {
+  LibFunc::Func Func;
+  AllocType AllocTy;
+  unsigned char NumParams;
+  // First and Second size parameters (or -1 if unused)
+  signed char FstParam, SndParam;
+};
+
+// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
+// know which functions are nounwind, noalias, nocapture parameters, etc.
+static const AllocFnsTy AllocationFnData[] = {
+  {LibFunc::malloc,              MallocLike,  1, 0,  -1},
+  {LibFunc::valloc,              MallocLike,  1, 0,  -1},
+  {LibFunc::Znwj,                OpNewLike,   1, 0,  -1}, // new(unsigned int)
+  {LibFunc::ZnwjRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new(unsigned int, nothrow)
+  {LibFunc::Znwm,                OpNewLike,   1, 0,  -1}, // new(unsigned long)
+  {LibFunc::ZnwmRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new(unsigned long, nothrow)
+  {LibFunc::Znaj,                OpNewLike,   1, 0,  -1}, // new[](unsigned int)
+  {LibFunc::ZnajRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)
+  {LibFunc::Znam,                OpNewLike,   1, 0,  -1}, // new[](unsigned long)
+  {LibFunc::ZnamRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned long, nothrow)
+  {LibFunc::msvc_new_int,         OpNewLike,   1, 0,  -1}, // new(unsigned int)
+  {LibFunc::msvc_new_int_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned int, nothrow)
+  {LibFunc::msvc_new_longlong,         OpNewLike,   1, 0,  -1}, // new(unsigned long long)
+  {LibFunc::msvc_new_longlong_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned long long, nothrow)
+  {LibFunc::msvc_new_array_int,         OpNewLike,   1, 0,  -1}, // new[](unsigned int)
+  {LibFunc::msvc_new_array_int_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)
+  {LibFunc::msvc_new_array_longlong,         OpNewLike,   1, 0,  -1}, // new[](unsigned long long)
+  {LibFunc::msvc_new_array_longlong_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned long long, nothrow)
+  {LibFunc::calloc,              CallocLike,  2, 0,   1},
+  {LibFunc::realloc,             ReallocLike, 2, 1,  -1},
+  {LibFunc::reallocf,            ReallocLike, 2, 1,  -1},
+  {LibFunc::strdup,              StrDupLike,  1, -1, -1},
+  {LibFunc::strndup,             StrDupLike,  2, 1,  -1}
+  // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
+};
+
+
+static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
+  if (LookThroughBitCast)
+    V = V->stripPointerCasts();
+
+  CallSite CS(const_cast<Value*>(V));
+  if (!CS.getInstruction())
+    return nullptr;
+
+  if (CS.isNoBuiltin())
+    return nullptr;
+
+  Function *Callee = CS.getCalledFunction();
+  if (!Callee || !Callee->isDeclaration())
+    return nullptr;
+  return Callee;
+}
+
+/// \brief Returns the allocation data for the given value if it is a call to a
+/// known allocation function, and NULL otherwise.
+static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
+                                           const TargetLibraryInfo *TLI,
+                                           bool LookThroughBitCast = false) {
+  // Skip intrinsics
+  if (isa<IntrinsicInst>(V))
+    return nullptr;
+
+  Function *Callee = getCalledFunction(V, LookThroughBitCast);
+  if (!Callee)
+    return nullptr;
+
+  // Make sure that the function is available.
+  StringRef FnName = Callee->getName();
+  LibFunc::Func TLIFn;
+  if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
+    return nullptr;
+
+  const AllocFnsTy *FnData =
+      std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
+                   [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; });
+
+  if (FnData == std::end(AllocationFnData))
+    return nullptr;
+
+  if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
+    return nullptr;
+
+  // Check function prototype.
+  int FstParam = FnData->FstParam;
+  int SndParam = FnData->SndParam;
+  FunctionType *FTy = Callee->getFunctionType();
+
+  if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) &&
+      FTy->getNumParams() == FnData->NumParams &&
+      (FstParam < 0 ||
+       (FTy->getParamType(FstParam)->isIntegerTy(32) ||
+        FTy->getParamType(FstParam)->isIntegerTy(64))) &&
+      (SndParam < 0 ||
+       FTy->getParamType(SndParam)->isIntegerTy(32) ||
+       FTy->getParamType(SndParam)->isIntegerTy(64)))
+    return FnData;
+  return nullptr;
+}
+
+static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
+  ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
+  return CS && CS.hasFnAttr(Attribute::NoAlias);
+}
+
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
+/// like).
+bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
+                          bool LookThroughBitCast) {
+  return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a function that returns a
+/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
+bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
+                       bool LookThroughBitCast) {
+  // it's safe to consider realloc as noalias since accessing the original
+  // pointer is undefined behavior
+  return isAllocationFn(V, TLI, LookThroughBitCast) ||
+         hasNoAliasAttr(V, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates uninitialized memory (such as malloc).
+bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                          bool LookThroughBitCast) {
+  return getAllocationData(V, MallocLike, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates zero-filled memory (such as calloc).
+bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                          bool LookThroughBitCast) {
+  return getAllocationData(V, CallocLike, TLI, LookThroughBitCast);
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory (either malloc, calloc, or strdup like).
+bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                         bool LookThroughBitCast) {
+  return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I,
+                                        const TargetLibraryInfo *TLI) {
+  return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
+}
+
+static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
+                               const TargetLibraryInfo *TLI,
+                               bool LookThroughSExt = false) {
+  if (!CI)
+    return nullptr;
+
+  // The size of the malloc's result type must be known to determine array size.
+  Type *T = getMallocAllocatedType(CI, TLI);
+  if (!T || !T->isSized())
+    return nullptr;
+
+  unsigned ElementSize = DL.getTypeAllocSize(T);
+  if (StructType *ST = dyn_cast<StructType>(T))
+    ElementSize = DL.getStructLayout(ST)->getSizeInBytes();
+
+  // If malloc call's arg can be determined to be a multiple of ElementSize,
+  // return the multiple.  Otherwise, return NULL.
+  Value *MallocArg = CI->getArgOperand(0);
+  Value *Multiple = nullptr;
+  if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+                      LookThroughSExt))
+    return Multiple;
+
+  return nullptr;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+PointerType *llvm::getMallocType(const CallInst *CI,
+                                 const TargetLibraryInfo *TLI) {
+  assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
+
+  PointerType *MallocType = nullptr;
+  unsigned NumOfBitCastUses = 0;
+
+  // Determine if CallInst has a bitcast use.
+  for (Value::const_user_iterator UI = CI->user_begin(), E = CI->user_end();
+       UI != E;)
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+      MallocType = cast<PointerType>(BCI->getDestTy());
+      NumOfBitCastUses++;
+    }
+
+  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+  if (NumOfBitCastUses == 1)
+    return MallocType;
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (NumOfBitCastUses == 0)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return nullptr;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+Type *llvm::getMallocAllocatedType(const CallInst *CI,
+                                   const TargetLibraryInfo *TLI) {
+  PointerType *PT = getMallocType(CI, TLI);
+  return PT ? PT->getElementType() : nullptr;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL,
+                                const TargetLibraryInfo *TLI,
+                                bool LookThroughSExt) {
+  assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
+  return computeArraySize(CI, DL, TLI, LookThroughSExt);
+}
+
+
+/// extractCallocCall - Returns the corresponding CallInst if the instruction
+/// is a calloc call.
+const CallInst *llvm::extractCallocCall(const Value *I,
+                                        const TargetLibraryInfo *TLI) {
+  return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
+}
+
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI || isa<IntrinsicInst>(CI))
+    return nullptr;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == nullptr)
+    return nullptr;
+
+  StringRef FnName = Callee->getName();
+  LibFunc::Func TLIFn;
+  if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
+    return nullptr;
+
+  unsigned ExpectedNumParams;
+  if (TLIFn == LibFunc::free ||
+      TLIFn == LibFunc::ZdlPv || // operator delete(void*)
+      TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
+      TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
+      TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
+      TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
+      TLIFn == LibFunc::msvc_delete_array_ptr64)   // operator delete[](void*)
+    ExpectedNumParams = 1;
+  else if (TLIFn == LibFunc::ZdlPvj ||              // delete(void*, uint)
+           TLIFn == LibFunc::ZdlPvm ||              // delete(void*, ulong)
+           TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+           TLIFn == LibFunc::ZdaPvj ||              // delete[](void*, uint)
+           TLIFn == LibFunc::ZdaPvm ||              // delete[](void*, ulong)
+           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_ptr32_int ||      // delete(void*, uint)
+           TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+           TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_array_ptr32_int ||      // delete[](void*, uint)
+           TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+           TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow)   // delete[](void*, nothrow)
+    ExpectedNumParams = 2;
+  else
+    return nullptr;
+
+  // Check free prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
+  // attribute will exist.
+  FunctionType *FTy = Callee->getFunctionType();
+  if (!FTy->getReturnType()->isVoidTy())
+    return nullptr;
+  if (FTy->getNumParams() != ExpectedNumParams)
+    return nullptr;
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+    return nullptr;
+
+  return CI;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//  Utility functions to compute size of objects.
+//
+
+
+/// \brief Compute the size of the object pointed by Ptr. Returns true and the
+/// object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
+                         const TargetLibraryInfo *TLI, bool RoundToAlign) {
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
+  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
+  if (!Visitor.bothKnown(Data))
+    return false;
+
+  APInt ObjSize = Data.first, Offset = Data.second;
+  // check for overflow
+  if (Offset.slt(0) || ObjSize.ult(Offset))
+    Size = 0;
+  else
+    Size = (ObjSize - Offset).getZExtValue();
+  return true;
+}
+
+
+STATISTIC(ObjectVisitorArgument,
+          "Number of arguments with unsolved size and offset");
+STATISTIC(ObjectVisitorLoad,
+          "Number of load instructions with unsolved size and offset");
+
+
+APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
+  if (RoundToAlign && Align)
+    return APInt(IntTyBits, RoundUpToAlignment(Size.getZExtValue(), Align));
+  return Size;
+}
+
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
+                                                 const TargetLibraryInfo *TLI,
+                                                 LLVMContext &Context,
+                                                 bool RoundToAlign)
+    : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+  // Pointer size must be rechecked for each object visited since it could have
+  // a different address space.
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+  IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
+  Zero = APInt::getNullValue(IntTyBits);
+
+  V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I).second)
+      return unknown();
+
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      return visitGEPOperator(*GEP);
+    return visit(*I);
+  }
+  if (Argument *A = dyn_cast<Argument>(V))
+    return visitArgument(*A);
+  if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
+    return visitConstantPointerNull(*P);
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return visitGlobalAlias(*GA);
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return visitGlobalVariable(*GV);
+  if (UndefValue *UV = dyn_cast<UndefValue>(V))
+    return visitUndefValue(*UV);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
+  }
+
+  DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
+        << '\n');
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
+  if (!I.getAllocatedType()->isSized())
+    return unknown();
+
+  APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
+  if (!I.isArrayAllocation())
+    return std::make_pair(align(Size, I.getAlignment()), Zero);
+
+  Value *ArraySize = I.getArraySize();
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
+    Size *= C->getValue().zextOrSelf(IntTyBits);
+    return std::make_pair(align(Size, I.getAlignment()), Zero);
+  }
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
+  // no interprocedural analysis is done at the moment
+  if (!A.hasByValOrInAllocaAttr()) {
+    ++ObjectVisitorArgument;
+    return unknown();
+  }
+  PointerType *PT = cast<PointerType>(A.getType());
+  APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType()));
+  return std::make_pair(align(Size, A.getParamAlignment()), Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
+  const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc,
+                                               TLI);
+  if (!FnData)
+    return unknown();
+
+  // handle strdup-like functions separately
+  if (FnData->AllocTy == StrDupLike) {
+    APInt Size(IntTyBits, GetStringLength(CS.getArgument(0)));
+    if (!Size)
+      return unknown();
+
+    // strndup limits strlen
+    if (FnData->FstParam > 0) {
+      ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+      if (!Arg)
+        return unknown();
+
+      APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+      if (Size.ugt(MaxSize))
+        Size = MaxSize + 1;
+    }
+    return std::make_pair(Size, Zero);
+  }
+
+  ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+  if (!Arg)
+    return unknown();
+
+  APInt Size = Arg->getValue().zextOrSelf(IntTyBits);
+  // size determined by just 1 parameter
+  if (FnData->SndParam < 0)
+    return std::make_pair(Size, Zero);
+
+  Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam));
+  if (!Arg)
+    return unknown();
+
+  Size *= Arg->getValue().zextOrSelf(IntTyBits);
+  return std::make_pair(Size, Zero);
+
+  // TODO: handle more standard functions (+ wchar cousins):
+  // - strdup / strndup
+  // - strcpy / strncpy
+  // - strcat / strncat
+  // - memcpy / memmove
+  // - strcat / strncat
+  // - memset
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) {
+  return std::make_pair(Zero, Zero);
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) {
+  return unknown();
+}
+
+SizeOffsetType
+ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
+  // Easy cases were already folded by previous passes.
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
+  SizeOffsetType PtrData = compute(GEP.getPointerOperand());
+  APInt Offset(IntTyBits, 0);
+  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
+    return unknown();
+
+  return std::make_pair(PtrData.first, PtrData.second + Offset);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
+  if (GA.mayBeOverridden())
+    return unknown();
+  return compute(GA.getAliasee());
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
+  if (!GV.hasDefinitiveInitializer())
+    return unknown();
+
+  APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
+  return std::make_pair(align(Size, GV.getAlignment()), Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) {
+  // clueless
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
+  ++ObjectVisitorLoad;
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+  // too complex to analyze statically.
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
+  SizeOffsetType TrueSide  = compute(I.getTrueValue());
+  SizeOffsetType FalseSide = compute(I.getFalseValue());
+  if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide)
+    return TrueSide;
+  return unknown();
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
+  return std::make_pair(Zero, Zero);
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
+  DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n');
+  return unknown();
+}
+
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
+    const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
+    bool RoundToAlign)
+    : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+      RoundToAlign(RoundToAlign) {
+  // IntTy and Zero must be set for each compute() since the address space may
+  // be different for later objects.
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
+  // XXX - Are vectors of pointers possible here?
+  IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
+  Zero = ConstantInt::get(IntTy, 0);
+
+  SizeOffsetEvalType Result = compute_(V);
+
+  if (!bothKnown(Result)) {
+    // erase everything that was computed in this iteration from the cache, so
+    // that no dangling references are left behind. We could be a bit smarter if
+    // we kept a dependency graph. It's probably not worth the complexity.
+    for (PtrSetTy::iterator I=SeenVals.begin(), E=SeenVals.end(); I != E; ++I) {
+      CacheMapTy::iterator CacheIt = CacheMap.find(*I);
+      // non-computable results can be safely cached
+      if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
+        CacheMap.erase(CacheIt);
+    }
+  }
+
+  SeenVals.clear();
+  return Result;
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
+  SizeOffsetType Const = Visitor.compute(V);
+  if (Visitor.bothKnown(Const))
+    return std::make_pair(ConstantInt::get(Context, Const.first),
+                          ConstantInt::get(Context, Const.second));
+
+  V = V->stripPointerCasts();
+
+  // check cache
+  CacheMapTy::iterator CacheIt = CacheMap.find(V);
+  if (CacheIt != CacheMap.end())
+    return CacheIt->second;
+
+  // always generate code immediately before the instruction being
+  // processed, so that the generated code dominates the same BBs
+  BuilderTy::InsertPointGuard Guard(Builder);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    Builder.SetInsertPoint(I);
+
+  // now compute the size and offset
+  SizeOffsetEvalType Result;
+
+  // Record the pointers that were handled in this run, so that they can be
+  // cleaned later if something fails. We also use this set to break cycles that
+  // can occur in dead code.
+  if (!SeenVals.insert(V).second) {
+    Result = unknown();
+  } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    Result = visitGEPOperator(*GEP);
+  } else if (Instruction *I = dyn_cast<Instruction>(V)) {
+    Result = visit(*I);
+  } else if (isa<Argument>(V) ||
+             (isa<ConstantExpr>(V) &&
+              cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) ||
+             isa<GlobalAlias>(V) ||
+             isa<GlobalVariable>(V)) {
+    // ignore values where we cannot do more than what ObjectSizeVisitor can
+    Result = unknown();
+  } else {
+    DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: "
+          << *V << '\n');
+    Result = unknown();
+  }
+
+  // Don't reuse CacheIt since it may be invalid at this point.
+  CacheMap[V] = Result;
+  return Result;
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
+  if (!I.getAllocatedType()->isSized())
+    return unknown();
+
+  // must be a VLA
+  assert(I.isArrayAllocation());
+  Value *ArraySize = I.getArraySize();
+  Value *Size = ConstantInt::get(ArraySize->getType(),
+                                 DL.getTypeAllocSize(I.getAllocatedType()));
+  Size = Builder.CreateMul(Size, ArraySize);
+  return std::make_pair(Size, Zero);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
+  const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc,
+                                               TLI);
+  if (!FnData)
+    return unknown();
+
+  // handle strdup-like functions separately
+  if (FnData->AllocTy == StrDupLike) {
+    // TODO
+    return unknown();
+  }
+
+  Value *FirstArg = CS.getArgument(FnData->FstParam);
+  FirstArg = Builder.CreateZExt(FirstArg, IntTy);
+  if (FnData->SndParam < 0)
+    return std::make_pair(FirstArg, Zero);
+
+  Value *SecondArg = CS.getArgument(FnData->SndParam);
+  SecondArg = Builder.CreateZExt(SecondArg, IntTy);
+  Value *Size = Builder.CreateMul(FirstArg, SecondArg);
+  return std::make_pair(Size, Zero);
+
+  // TODO: handle more standard functions (+ wchar cousins):
+  // - strdup / strndup
+  // - strcpy / strncpy
+  // - strcat / strncat
+  // - memcpy / memmove
+  // - strcat / strncat
+  // - memset
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) {
+  return unknown();
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) {
+  return unknown();
+}
+
+SizeOffsetEvalType
+ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
+  SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand());
+  if (!bothKnown(PtrData))
+    return unknown();
+
+  Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true);
+  Offset = Builder.CreateAdd(PtrData.second, Offset);
+  return std::make_pair(PtrData.first, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) {
+  // clueless
+  return unknown();
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) {
+  return unknown();
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
+  // create 2 PHIs: one for size and another for offset
+  PHINode *SizePHI   = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
+  PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
+
+  // insert right away in the cache to handle recursive PHIs
+  CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI);
+
+  // compute offset/size for each PHI incoming pointer
+  for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
+    Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
+    SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
+
+    if (!bothKnown(EdgeData)) {
+      OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
+      OffsetPHI->eraseFromParent();
+      SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
+      SizePHI->eraseFromParent();
+      return unknown();
+    }
+    SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
+    OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
+  }
+
+  Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp;
+  if ((Tmp = SizePHI->hasConstantValue())) {
+    Size = Tmp;
+    SizePHI->replaceAllUsesWith(Size);
+    SizePHI->eraseFromParent();
+  }
+  if ((Tmp = OffsetPHI->hasConstantValue())) {
+    Offset = Tmp;
+    OffsetPHI->replaceAllUsesWith(Offset);
+    OffsetPHI->eraseFromParent();
+  }
+  return std::make_pair(Size, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) {
+  SizeOffsetEvalType TrueSide  = compute_(I.getTrueValue());
+  SizeOffsetEvalType FalseSide = compute_(I.getFalseValue());
+
+  if (!bothKnown(TrueSide) || !bothKnown(FalseSide))
+    return unknown();
+  if (TrueSide == FalseSide)
+    return TrueSide;
+
+  Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first,
+                                     FalseSide.first);
+  Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second,
+                                       FalseSide.second);
+  return std::make_pair(Size, Offset);
+}
+
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) {
+  DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n');
+  return unknown();
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 0000000..6918360
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1756 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on.  It builds on
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "memdep"
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+          "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+          "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+          "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+          "Number of block queries that were completely cached");
+
+// Limit for the number of instructions to scan in a block.
+
+static cl::opt<unsigned> BlockScanLimit(
+    "memdep-block-scan-limit", cl::Hidden, cl::init(100),
+    cl::desc("The number of instructions to scan in a block in memory "
+             "dependency analysis (default = 100)"));
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
+
+char MemoryDependenceAnalysis::ID = 0;
+
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+                "Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+                      "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+    : FunctionPass(ID) {
+  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+  LocalDeps.clear();
+  NonLocalDeps.clear();
+  NonLocalPointerDeps.clear();
+  ReverseLocalDeps.clear();
+  ReverseNonLocalDeps.clear();
+  ReverseNonLocalPtrDeps.clear();
+  PredCache.clear();
+}
+
+/// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequiredTransitive<AAResultsWrapperPass>();
+  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*,
+                                 SmallPtrSet<KeyTy, 4> > &ReverseMap,
+                                 Instruction *Inst, KeyTy Val) {
+  typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+  InstIt = ReverseMap.find(Inst);
+  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+  bool Found = InstIt->second.erase(Val);
+  assert(Found && "Invalid reverse map!"); (void)Found;
+  if (InstIt->second.empty())
+    ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
+                              const TargetLibraryInfo &TLI) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    if (LI->isUnordered()) {
+      Loc = MemoryLocation::get(LI);
+      return MRI_Ref;
+    }
+    if (LI->getOrdering() == Monotonic) {
+      Loc = MemoryLocation::get(LI);
+      return MRI_ModRef;
+    }
+    Loc = MemoryLocation();
+    return MRI_ModRef;
+  }
+
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->isUnordered()) {
+      Loc = MemoryLocation::get(SI);
+      return MRI_Mod;
+    }
+    if (SI->getOrdering() == Monotonic) {
+      Loc = MemoryLocation::get(SI);
+      return MRI_ModRef;
+    }
+    Loc = MemoryLocation();
+    return MRI_ModRef;
+  }
+
+  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+    Loc = MemoryLocation::get(V);
+    return MRI_ModRef;
+  }
+
+  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
+    // calls to free() deallocate the entire structure
+    Loc = MemoryLocation(CI->getArgOperand(0));
+    return MRI_Mod;
+  }
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    AAMDNodes AAInfo;
+
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      II->getAAMetadata(AAInfo);
+      Loc = MemoryLocation(
+          II->getArgOperand(1),
+          cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return MRI_Mod;
+    case Intrinsic::invariant_end:
+      II->getAAMetadata(AAInfo);
+      Loc = MemoryLocation(
+          II->getArgOperand(2),
+          cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return MRI_Mod;
+    default:
+      break;
+    }
+  }
+
+  // Otherwise, just do the coarse-grained thing that always works.
+  if (Inst->mayWriteToMemory())
+    return MRI_ModRef;
+  if (Inst->mayReadFromMemory())
+    return MRI_Ref;
+  return MRI_NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  unsigned Limit = BlockScanLimit;
+
+  // Walk backwards through the block, looking for dependencies
+  while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
+    Instruction *Inst = &*--ScanIt;
+
+    // If this inst is a memory op, get the pointer it accessed
+    MemoryLocation Loc;
+    ModRefInfo MR = GetLocation(Inst, Loc, *TLI);
+    if (Loc.Ptr) {
+      // A simple instruction.
+      if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)
+        return MemDepResult::getClobber(Inst);
+      continue;
+    }
+
+    if (auto InstCS = CallSite(Inst)) {
+      // Debug intrinsics don't cause dependences.
+      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      // If these two calls do not interfere, look past it.
+      switch (AA->getModRefInfo(CS, InstCS)) {
+      case MRI_NoModRef:
+        // If the two calls are the same, return InstCS as a Def, so that
+        // CS can be found redundant and eliminated.
+        if (isReadOnlyCall && !(MR & MRI_Mod) &&
+            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+          return MemDepResult::getDef(Inst);
+
+        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+        // keep scanning.
+        continue;
+      default:
+        return MemDepResult::getClobber(Inst);
+      }
+    }
+
+    // If we could not obtain a pointer for the instruction and the instruction
+    // touches memory then assume that this is a dependency.
+    if (MR != MRI_NoModRef)
+      return MemDepResult::getClobber(Inst);
+  }
+
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getNonFuncLocal();
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
+                                                   const Value *&MemLocBase,
+                                                   int64_t &MemLocOffs,
+                                                   const LoadInst *LI) {
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+
+  // If we haven't already computed the base/offset of MemLoc, do so now.
+  if (!MemLocBase)
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
+
+  unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+      MemLocBase, MemLocOffs, MemLoc.Size, LI);
+  return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load.  If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use.  If not, this returns zero.
+unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+    const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
+    const LoadInst *LI) {
+  // We can only extend simple integer loads.
+  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
+
+  // Load widening is hostile to ThreadSanitizer: it may cause false positives
+  // or make the reports more cryptic (access sizes are wrong).
+  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+    return 0;
+
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+
+  // Get the base of this load.
+  int64_t LIOffs = 0;
+  const Value *LIBase =
+      GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
+
+  // If the two pointers are not based on the same pointer, we can't tell that
+  // they are related.
+  if (LIBase != MemLocBase) return 0;
+
+  // Okay, the two values are based on the same pointer, but returned as
+  // no-alias.  This happens when we have things like two byte loads at "P+1"
+  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
+  // alignment (or the largest native integer type) will allow us to load all
+  // the bits required by MemLoc.
+
+  // If MemLoc is before LI, then no widening of LI will help us out.
+  if (MemLocOffs < LIOffs) return 0;
+
+  // Get the alignment of the load in bytes.  We assume that it is safe to load
+  // any legal integer up to this size without a problem.  For example, if we're
+  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
+  // to i16.
+  unsigned LoadAlign = LI->getAlignment();
+
+  int64_t MemLocEnd = MemLocOffs+MemLocSize;
+
+  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+  if (LIOffs+LoadAlign < MemLocEnd) return 0;
+
+  // This is the size of the load to try.  Start with the next larger power of
+  // two.
+  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+
+  while (1) {
+    // If this load size is bigger than our known alignment or would not fit
+    // into a native integer register, then we fail.
+    if (NewLoadByteSize > LoadAlign ||
+        !DL.fitsInLegalInteger(NewLoadByteSize*8))
+      return 0;
+
+    if (LIOffs + NewLoadByteSize > MemLocEnd &&
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeAddress))
+      // We will be reading past the location accessed by the original program.
+      // While this is safe in a regular build, Address Safety analysis tools
+      // may start reporting false warnings. So, don't do widening.
+      return 0;
+
+    // If a load of this width would include all of MemLoc, then we succeed.
+    if (LIOffs+NewLoadByteSize >= MemLocEnd)
+      return NewLoadByteSize;
+
+    NewLoadByteSize <<= 1;
+  }
+}
+
+static bool isVolatile(Instruction *Inst) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+    return LI->isVolatile();
+  else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    return SI->isVolatile();
+  else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+    return AI->isVolatile();
+  return false;
+}
+
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends.  If isLoad is true, this routine ignores may-aliases with
+/// read-only operations.  If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.  If possible, pass the query
+/// instruction as well; this function may take advantage of the metadata
+/// annotated to the query instruction to refine the result.
+MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
+    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+    BasicBlock *BB, Instruction *QueryInst) {
+
+  if (QueryInst != nullptr) {
+    if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
+      MemDepResult invariantGroupDependency =
+          getInvariantGroupPointerDependency(LI, BB);
+
+      if (invariantGroupDependency.isDef())
+        return invariantGroupDependency;
+    }
+  }
+  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+}
+
+MemDepResult
+MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI,
+                                                             BasicBlock *BB) {
+  Value *LoadOperand = LI->getPointerOperand();
+  // It's is not safe to walk the use list of global value, because function
+  // passes aren't allowed to look outside their functions.
+  if (isa<GlobalValue>(LoadOperand))
+    return MemDepResult::getUnknown();
+
+  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+  if (!InvariantGroupMD)
+    return MemDepResult::getUnknown();
+
+  MemDepResult Result = MemDepResult::getUnknown();
+  llvm::SmallSet<Value *, 14> Seen;
+  // Queue to process all pointers that are equivalent to load operand.
+  llvm::SmallVector<Value *, 8> LoadOperandsQueue;
+  LoadOperandsQueue.push_back(LoadOperand);
+  while (!LoadOperandsQueue.empty()) {
+    Value *Ptr = LoadOperandsQueue.pop_back_val();
+    if (isa<GlobalValue>(Ptr))
+      continue;
+
+    if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
+      if (!Seen.count(BCI->getOperand(0))) {
+        LoadOperandsQueue.push_back(BCI->getOperand(0));
+        Seen.insert(BCI->getOperand(0));
+      }
+    }
+
+    for (Use &Us : Ptr->uses()) {
+      auto *U = dyn_cast<Instruction>(Us.getUser());
+      if (!U || U == LI || !DT->dominates(U, LI))
+        continue;
+
+      if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+        if (!Seen.count(BCI)) {
+          LoadOperandsQueue.push_back(BCI);
+          Seen.insert(BCI);
+        }
+        continue;
+      }
+      // If we hit load/store with the same invariant.group metadata (and the
+      // same pointer operand) we can assume that value pointed by pointer
+      // operand didn't change.
+      if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+          U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
+        return MemDepResult::getDef(U);
+    }
+  }
+  return Result;
+}
+
+MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
+    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+    BasicBlock *BB, Instruction *QueryInst) {
+
+  const Value *MemLocBase = nullptr;
+  int64_t MemLocOffset = 0;
+  unsigned Limit = BlockScanLimit;
+  bool isInvariantLoad = false;
+
+  // We must be careful with atomic accesses, as they may allow another thread
+  //   to touch this location, cloberring it. We are conservative: if the
+  //   QueryInst is not a simple (non-atomic) memory access, we automatically
+  //   return getClobber.
+  // If it is simple, we know based on the results of
+  // "Compiler testing via a theory of sound optimisations in the C11/C++11
+  //   memory model" in PLDI 2013, that a non-atomic location can only be
+  //   clobbered between a pair of a release and an acquire action, with no
+  //   access to the location in between.
+  // Here is an example for giving the general intuition behind this rule.
+  // In the following code:
+  //   store x 0;
+  //   release action; [1]
+  //   acquire action; [4]
+  //   %val = load x;
+  // It is unsafe to replace %val by 0 because another thread may be running:
+  //   acquire action; [2]
+  //   store x 42;
+  //   release action; [3]
+  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+  // being 42. A key property of this program however is that if either
+  // 1 or 4 were missing, there would be a race between the store of 42
+  // either the store of 0 or the load (making the whole progam racy).
+  // The paper mentioned above shows that the same property is respected
+  // by every program that can detect any optimisation of that kind: either
+  // it is racy (undefined) or there is a release followed by an acquire
+  // between the pair of accesses under consideration.
+
+  // If the load is invariant, we "know" that it doesn't alias *any* write. We
+  // do want to respect mustalias results since defs are useful for value
+  // forwarding, but any mayalias write can be assumed to be noalias.
+  // Arguably, this logic should be pushed inside AliasAnalysis itself.
+  if (isLoad && QueryInst) {
+    LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
+    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+      isInvariantLoad = true;
+  }
+
+  const DataLayout &DL = BB->getModule()->getDataLayout();
+
+  // Create a numbered basic block to lazily compute and cache instruction
+  // positions inside a BB. This is used to provide fast queries for relative
+  // position between two instructions in a BB and can be used by
+  // AliasAnalysis::callCapturesBefore.
+  OrderedBasicBlock OBB(BB);
+
+  // Walk backwards through the basic block, looking for dependencies.
+  while (ScanIt != BB->begin()) {
+    Instruction *Inst = &*--ScanIt;
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+      // Debug intrinsics don't (and can't) cause dependencies.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
+
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+      // If we reach a lifetime begin or end marker, then the query ends here
+      // because the value is undefined.
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point (the right approach is to use
+        // GetPointerBaseWithConstantOffset).
+        if (AA->isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
+          return MemDepResult::getDef(II);
+        continue;
+      }
+    }
+
+    // Values depend on loads if the pointers are must aliased.  This means that
+    // a load depends on another must aliased load from the same value.
+    // One exception is atomic loads: a value can depend on an atomic load that it
+    // does not alias with when this atomic load indicates that another thread may
+    // be accessing the location.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+
+      // While volatile access cannot be eliminated, they do not have to clobber
+      // non-aliasing locations, as normal accesses, for example, can be safely
+      // reordered with volatile accesses.
+      if (LI->isVolatile()) {
+        if (!QueryInst)
+          // Original QueryInst *may* be volatile
+          return MemDepResult::getClobber(LI);
+        if (isVolatile(QueryInst))
+          // Ordering required if QueryInst is itself volatile
+          return MemDepResult::getClobber(LI);
+        // Otherwise, volatile doesn't imply any special ordering
+      }
+      
+      // Atomic loads have complications involved.
+      // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+      // FIXME: This is overly conservative.
+      if (LI->isAtomic() && LI->getOrdering() > Unordered) {
+        if (!QueryInst)
+          return MemDepResult::getClobber(LI);
+        if (LI->getOrdering() != Monotonic)
+          return MemDepResult::getClobber(LI);
+        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+          if (!QueryLI->isSimple())
+            return MemDepResult::getClobber(LI);
+        } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+          if (!QuerySI->isSimple())
+            return MemDepResult::getClobber(LI);
+        } else if (QueryInst->mayReadOrWriteMemory()) {
+          return MemDepResult::getClobber(LI);
+        }
+      }
+
+      MemoryLocation LoadLoc = MemoryLocation::get(LI);
+
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasResult R = AA->alias(LoadLoc, MemLoc);
+
+      if (isLoad) {
+        if (R == NoAlias) {
+          // If this is an over-aligned integer load (for example,
+          // "load i8* %P, align 4") see if it would obviously overlap with the
+          // queried location if widened to a larger load (e.g. if the queried
+          // location is 1 byte at P+1).  If so, return it as a load/load
+          // clobber result, allowing the client to decide to widen the load if
+          // it wants to.
+          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+            if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
+                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+                                                       MemLocOffset, LI))
+              return MemDepResult::getClobber(Inst);
+          }
+          continue;
+        }
+
+        // Must aliased loads are defs of each other.
+        if (R == MustAlias)
+          return MemDepResult::getDef(Inst);
+
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+      // in terms of clobbering loads, but since it does this by looking
+      // at the clobbering load directly, it doesn't know about any
+      // phi translation that may have happened along the way.
+
+        // If we have a partial alias, then return this as a clobber for the
+        // client to handle.
+        if (R == PartialAlias)
+          return MemDepResult::getClobber(Inst);
+#endif
+
+        // Random may-alias loads don't depend on each other without a
+        // dependence.
+        continue;
+      }
+
+      // Stores don't depend on other no-aliased accesses.
+      if (R == NoAlias)
+        continue;
+
+      // Stores don't alias loads from read-only memory.
+      if (AA->pointsToConstantMemory(LoadLoc))
+        continue;
+
+      // Stores depend on may/must aliased loads.
+      return MemDepResult::getDef(Inst);
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // Atomic stores have complications involved.
+      // A Monotonic store is OK if the query inst is itself not atomic.
+      // FIXME: This is overly conservative.
+      if (!SI->isUnordered()) {
+        if (!QueryInst)
+          return MemDepResult::getClobber(SI);
+        if (SI->getOrdering() != Monotonic)
+          return MemDepResult::getClobber(SI);
+        if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+          if (!QueryLI->isSimple())
+            return MemDepResult::getClobber(SI);
+        } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+          if (!QuerySI->isSimple())
+            return MemDepResult::getClobber(SI);
+        } else if (QueryInst->mayReadOrWriteMemory()) {
+          return MemDepResult::getClobber(SI);
+        }
+      }
+
+      // FIXME: this is overly conservative.
+      // While volatile access cannot be eliminated, they do not have to clobber
+      // non-aliasing locations, as normal accesses can for example be reordered
+      // with volatile accesses.
+      if (SI->isVolatile())
+        return MemDepResult::getClobber(SI);
+
+      // If alias analysis can tell that this store is guaranteed to not modify
+      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
+      // the query pointer points to constant memory etc.
+      if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)
+        continue;
+
+      // Ok, this store might clobber the query pointer.  Check to see if it is
+      // a must alias: in this case, we want to return this as a def.
+      MemoryLocation StoreLoc = MemoryLocation::get(SI);
+
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasResult R = AA->alias(StoreLoc, MemLoc);
+
+      if (R == NoAlias)
+        continue;
+      if (R == MustAlias)
+        return MemDepResult::getDef(Inst);
+      if (isInvariantLoad)
+       continue;
+      return MemDepResult::getClobber(Inst);
+    }
+
+    // If this is an allocation, and if we know that the accessed pointer is to
+    // the allocation, return Def.  This means that there is no dependence and
+    // the access can be optimized based on that.  For example, a load could
+    // turn into undef.
+    // Note: Only determine this to be a malloc if Inst is the malloc call, not
+    // a subsequent bitcast of the malloc call result.  There can be stores to
+    // the malloced memory between the malloc call and its bitcast uses, and we
+    // need to continue scanning until the malloc call.
+    if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
+      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
+
+      if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
+        return MemDepResult::getDef(Inst);
+      if (isInvariantLoad)
+        continue;
+      // Be conservative if the accessed pointer may alias the allocation -
+      // fallback to the generic handling below.
+      if ((AA->alias(Inst, AccessPtr) == NoAlias) &&
+          // If the allocation is not aliased and does not read memory (like
+          // strdup), it is safe to ignore.
+          (isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) ||
+           isCallocLikeFn(Inst, TLI)))
+        continue;
+    }
+
+    if (isInvariantLoad)
+       continue;
+
+    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+    ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);
+    // If necessary, perform additional analysis.
+    if (MR == MRI_ModRef)
+      MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);
+    switch (MR) {
+    case MRI_NoModRef:
+      // If the call has no effect on the queried pointer, just ignore it.
+      continue;
+    case MRI_Mod:
+      return MemDepResult::getClobber(Inst);
+    case MRI_Ref:
+      // If the call is known to never store to the pointer, and if this is a
+      // load query, we can safely ignore it (scan past it).
+      if (isLoad)
+        continue;
+    default:
+      // Otherwise, there is a potential dependence.  Return a clobber.
+      return MemDepResult::getClobber(Inst);
+    }
+  }
+
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getNonFuncLocal();
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+  Instruction *ScanPos = QueryInst;
+
+  // Check for a cached result
+  MemDepResult &LocalCache = LocalDeps[QueryInst];
+
+  // If the cached entry is non-dirty, just return it.  Note that this depends
+  // on MemDepResult's default constructing to 'dirty'.
+  if (!LocalCache.isDirty())
+    return LocalCache;
+
+  // Otherwise, if we have a dirty entry, we know we can start the scan at that
+  // instruction, which may save us some work.
+  if (Instruction *Inst = LocalCache.getInst()) {
+    ScanPos = Inst;
+
+    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+  }
+
+  BasicBlock *QueryParent = QueryInst->getParent();
+
+  // Do the scan.
+  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+    // No dependence found.  If this is the entry block of the function, it is
+    // unknown, otherwise it is non-local.
+    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+      LocalCache = MemDepResult::getNonLocal();
+    else
+      LocalCache = MemDepResult::getNonFuncLocal();
+  } else {
+    MemoryLocation MemLoc;
+    ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);
+    if (MemLoc.Ptr) {
+      // If we can do a pointer scan, make it happen.
+      bool isLoad = !(MR & MRI_Mod);
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
+
+      LocalCache = getPointerDependencyFrom(
+          MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
+    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+      CallSite QueryCS(QueryInst);
+      bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+      LocalCache = getCallSiteDependencyFrom(
+          QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
+    } else
+      // Non-memory instruction.
+      LocalCache = MemDepResult::getUnknown();
+  }
+
+  // Remember the result!
+  if (Instruction *I = LocalCache.getInst())
+    ReverseLocalDeps[I].insert(QueryInst);
+
+  return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         int Count = -1) {
+  if (Count == -1) Count = Cache.size();
+  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
+         "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across.  The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed.  Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+  NonLocalDepInfo &Cache = CacheP.first;
+
+  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
+  /// the cached case, this can happen due to instructions being deleted etc. In
+  /// the uncached case, this starts out as the set of predecessors we care
+  /// about.
+  SmallVector<BasicBlock*, 32> DirtyBlocks;
+
+  if (!Cache.empty()) {
+    // Okay, we have a cache entry.  If we know it is not dirty, just return it
+    // with no computation.
+    if (!CacheP.second) {
+      ++NumCacheNonLocal;
+      return Cache;
+    }
+
+    // If we already have a partially computed set of results, scan them to
+    // determine what is dirty, seeding our initial DirtyBlocks worklist.
+    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+       I != E; ++I)
+      if (I->getResult().isDirty())
+        DirtyBlocks.push_back(I->getBB());
+
+    // Sort the cache so that we can do fast binary search lookups below.
+    std::sort(Cache.begin(), Cache.end());
+
+    ++NumCacheDirtyNonLocal;
+    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+    //     << Cache.size() << " cached: " << *QueryInst;
+  } else {
+    // Seed DirtyBlocks with each of the preds of QueryInst's block.
+    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+    for (BasicBlock *Pred : PredCache.get(QueryBB))
+      DirtyBlocks.push_back(Pred);
+    ++NumUncacheNonLocal;
+  }
+
+  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+  SmallPtrSet<BasicBlock*, 64> Visited;
+
+  unsigned NumSortedEntries = Cache.size();
+  DEBUG(AssertSorted(Cache));
+
+  // Iterate while we still have blocks to update.
+  while (!DirtyBlocks.empty()) {
+    BasicBlock *DirtyBB = DirtyBlocks.back();
+    DirtyBlocks.pop_back();
+
+    // Already processed this block?
+    if (!Visited.insert(DirtyBB).second)
+      continue;
+
+    // Do a binary search to see if we already have an entry for this block in
+    // the cache set.  If so, find it.
+    DEBUG(AssertSorted(Cache, NumSortedEntries));
+    NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+                       NonLocalDepEntry(DirtyBB));
+    if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
+      --Entry;
+
+    NonLocalDepEntry *ExistingResult = nullptr;
+    if (Entry != Cache.begin()+NumSortedEntries &&
+        Entry->getBB() == DirtyBB) {
+      // If we already have an entry, and if it isn't already dirty, the block
+      // is done.
+      if (!Entry->getResult().isDirty())
+        continue;
+
+      // Otherwise, remember this slot so we can update the value.
+      ExistingResult = &*Entry;
+    }
+
+    // If the dirty entry has a pointer, start scanning from it so we don't have
+    // to rescan the entire block.
+    BasicBlock::iterator ScanPos = DirtyBB->end();
+    if (ExistingResult) {
+      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+        ScanPos = Inst->getIterator();
+        // We're removing QueryInst's use of Inst.
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+                             QueryCS.getInstruction());
+      }
+    }
+
+    // Find out if this block has a local dependency for QueryInst.
+    MemDepResult Dep;
+
+    if (ScanPos != DirtyBB->begin()) {
+      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+      // No dependence found.  If this is the entry block of the function, it is
+      // a clobber, otherwise it is unknown.
+      Dep = MemDepResult::getNonLocal();
+    } else {
+      Dep = MemDepResult::getNonFuncLocal();
+    }
+
+    // If we had a dirty entry for the block, update it.  Otherwise, just add
+    // a new entry.
+    if (ExistingResult)
+      ExistingResult->setResult(Dep);
+    else
+      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+
+    // If the block has a dependency (i.e. it isn't completely transparent to
+    // the value), remember the association!
+    if (!Dep.isNonLocal()) {
+      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+      // update this when we remove instructions.
+      if (Instruction *Inst = Dep.getInst())
+        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+    } else {
+
+      // If the block *is* completely transparent to the load, we need to check
+      // the predecessors of this block.  Add them to our worklist.
+      for (BasicBlock *Pred : PredCache.get(DirtyBB))
+        DirtyBlocks.push_back(Pred);
+    }
+  }
+
+  return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(Instruction *QueryInst,
+                             SmallVectorImpl<NonLocalDepResult> &Result) {
+  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
+  bool isLoad = isa<LoadInst>(QueryInst);
+  BasicBlock *FromBB = QueryInst->getParent();
+  assert(FromBB);
+
+  assert(Loc.Ptr->getType()->isPointerTy() &&
+         "Can't get pointer deps of a non-pointer!");
+  Result.clear();
+  
+  // This routine does not expect to deal with volatile instructions.
+  // Doing so would require piping through the QueryInst all the way through.
+  // TODO: volatiles can't be elided, but they can be reordered with other
+  // non-volatile accesses.
+
+  // We currently give up on any instruction which is ordered, but we do handle
+  // atomic instructions which are unordered.
+  // TODO: Handle ordered instructions
+  auto isOrdered = [](Instruction *Inst) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      return !LI->isUnordered();
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      return !SI->isUnordered();
+    }
+    return false;
+  };
+  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
+    Result.push_back(NonLocalDepResult(FromBB,
+                                       MemDepResult::getUnknown(),
+                                       const_cast<Value *>(Loc.Ptr)));
+    return;
+  }
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
+
+  // This is the set of blocks we've inspected, and the pointer we consider in
+  // each block.  Because of critical edges, we currently bail out if querying
+  // a block with multiple different pointers.  This can happen during PHI
+  // translation.
+  DenseMap<BasicBlock*, Value*> Visited;
+  if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
+                                   Result, Visited, true))
+    return;
+  Result.clear();
+  Result.push_back(NonLocalDepResult(FromBB,
+                                     MemDepResult::getUnknown(),
+                                     const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available).  If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
+    Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
+    BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+
+  // Do a binary search to see if we already have an entry for this block in
+  // the cache set.  If so, find it.
+  NonLocalDepInfo::iterator Entry =
+    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+                     NonLocalDepEntry(BB));
+  if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+    --Entry;
+
+  NonLocalDepEntry *ExistingResult = nullptr;
+  if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+    ExistingResult = &*Entry;
+
+  // If we have a cached entry, and it is non-dirty, use it as the value for
+  // this dependency.
+  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+    ++NumCacheNonLocalPtr;
+    return ExistingResult->getResult();
+  }
+
+  // Otherwise, we have to scan for the value.  If we have a dirty cache
+  // entry, start scanning from its position, otherwise we scan from the end
+  // of the block.
+  BasicBlock::iterator ScanPos = BB->end();
+  if (ExistingResult && ExistingResult->getResult().getInst()) {
+    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+           "Instruction invalidated?");
+    ++NumCacheDirtyNonLocalPtr;
+    ScanPos = ExistingResult->getResult().getInst()->getIterator();
+
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
+  } else {
+    ++NumUncacheNonLocalPtr;
+  }
+
+  // Scan the block for the dependency.
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
+                                              QueryInst);
+
+  // If we had a dirty entry for the block, update it.  Otherwise, just add
+  // a new entry.
+  if (ExistingResult)
+    ExistingResult->setResult(Dep);
+  else
+    Cache->push_back(NonLocalDepEntry(BB, Dep));
+
+  // If the block has a dependency (i.e. it isn't completely transparent to
+  // the value), remember the reverse association because we just added it
+  // to Cache!
+  if (!Dep.isDef() && !Dep.isClobber())
+    return Dep;
+
+  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+  // update MemDep when we remove instructions.
+  Instruction *Inst = Dep.getInst();
+  assert(Inst && "Didn't depend on anything?");
+  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+  return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered.  This is
+/// optimized for the case when only a few entries are added.
+static void
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         unsigned NumSortedEntries) {
+  switch (Cache.size() - NumSortedEntries) {
+  case 0:
+    // done, no new entries.
+    break;
+  case 2: {
+    // Two new entries, insert the last one into place.
+    NonLocalDepEntry Val = Cache.back();
+    Cache.pop_back();
+    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+    Cache.insert(Entry, Val);
+    // FALL THROUGH.
+  }
+  case 1:
+    // One new entry, Just insert the new value at the appropriate position.
+    if (Cache.size() != 1) {
+      NonLocalDepEntry Val = Cache.back();
+      Cache.pop_back();
+      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache.begin(), Cache.end(), Val);
+      Cache.insert(Entry, Val);
+    }
+    break;
+  default:
+    // Added many values, do a full scale sort.
+    std::sort(Cache.begin(), Cache.end());
+    break;
+  }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true).  In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason.  This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
+    Instruction *QueryInst, const PHITransAddr &Pointer,
+    const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
+    SmallVectorImpl<NonLocalDepResult> &Result,
+    DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
+  // Look up the cached info for Pointer.
+  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // aa tags are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.AATags = Loc.AATags;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  // If we already have a cache entry for this CacheKey, we may need to do some
+  // work to reconcile the cache entry and the current query.
+  if (!Pair.second) {
+    if (CacheInfo->Size < Loc.Size) {
+      // The query's Size is greater than the cached one. Throw out the
+      // cached data and proceed with the query at the greater size.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      CacheInfo->Size = Loc.Size;
+      for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+           DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+        if (Instruction *Inst = DI->getResult().getInst())
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+      CacheInfo->NonLocalDeps.clear();
+    } else if (CacheInfo->Size > Loc.Size) {
+      // This query's Size is less than the cached one. Conservatively restart
+      // the query using the greater size.
+      return getNonLocalPointerDepFromBB(QueryInst, Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If the query's AATags are inconsistent with the cached one,
+    // conservatively throw out the cached data and restart the query with
+    // no tag if needed.
+    if (CacheInfo->AATags != Loc.AATags) {
+      if (CacheInfo->AATags) {
+        CacheInfo->Pair = BBSkipFirstBlockPair();
+        CacheInfo->AATags = AAMDNodes();
+        for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+             DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+          if (Instruction *Inst = DI->getResult().getInst())
+            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+        CacheInfo->NonLocalDeps.clear();
+      }
+      if (Loc.AATags)
+        return getNonLocalPointerDepFromBB(QueryInst,
+                                           Pointer, Loc.getWithoutAATags(),
+                                           isLoad, StartBB, Result, Visited,
+                                           SkipFirstBlock);
+    }
+  }
+
+  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+  // If we have valid cached information for exactly the block we are
+  // investigating, just return it with no recomputation.
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+    // We have a fully cached result for this query then we can just return the
+    // cached results and populate the visited set.  However, we have to verify
+    // that we don't already have conflicting results for these blocks.  Check
+    // to ensure that if a block in the results set is in the visited set that
+    // it was for the same pointer query.
+    if (!Visited.empty()) {
+      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+           I != E; ++I) {
+        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+        if (VI == Visited.end() || VI->second == Pointer.getAddr())
+          continue;
+
+        // We have a pointer mismatch in a block.  Just return clobber, saying
+        // that something was clobbered in this result.  We could also do a
+        // non-fully cached query, but there is little point in doing this.
+        return true;
+      }
+    }
+
+    Value *Addr = Pointer.getAddr();
+    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+         I != E; ++I) {
+      Visited.insert(std::make_pair(I->getBB(), Addr));
+      if (I->getResult().isNonLocal()) {
+        continue;
+      }
+
+      if (!DT) {
+        Result.push_back(NonLocalDepResult(I->getBB(),
+                                           MemDepResult::getUnknown(),
+                                           Addr));
+      } else if (DT->isReachableFromEntry(I->getBB())) {
+        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+      }
+    }
+    ++NumCacheCompleteNonLocalPtr;
+    return false;
+  }
+
+  // Otherwise, either this is a new block, a block with an invalid cache
+  // pointer or one that we're about to invalidate by putting more info into it
+  // than its valid cache info.  If empty, the result will be valid cache info,
+  // otherwise it isn't.
+  if (Cache->empty())
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+
+  SmallVector<BasicBlock*, 32> Worklist;
+  Worklist.push_back(StartBB);
+
+  // PredList used inside loop.
+  SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
+  // Keep track of the entries that we know are sorted.  Previously cached
+  // entries will all be sorted.  The entries we add we only sort on demand (we
+  // don't insert every element into its sorted position).  We know that we
+  // won't get any reuse from currently inserted values, because we don't
+  // revisit blocks after we insert info for them.
+  unsigned NumSortedEntries = Cache->size();
+  DEBUG(AssertSorted(*Cache));
+
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+
+    // If we do process a large number of blocks it becomes very expensive and
+    // likely it isn't worth worrying about
+    if (Result.size() > NumResultsLimit) {
+      Worklist.clear();
+      // Sort it now (if needed) so that recursive invocations of
+      // getNonLocalPointerDepFromBB and other routines that could reuse the
+      // cache value will only see properly sorted cache arrays.
+      if (Cache && NumSortedEntries != Cache->size()) {
+        SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      }
+      // Since we bail out, the "Cache" set won't contain all of the
+      // results for the query.  This is ok (we can still use it to accelerate
+      // specific block queries) but we can't do the fastpath "return all
+      // results from the set".  Clear out the indicator for this.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      return true;
+    }
+
+    // Skip the first block if we have it.
+    if (!SkipFirstBlock) {
+      // Analyze the dependency of *Pointer in FromBB.  See if we already have
+      // been here.
+      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+      // Get the dependency info for Pointer in BB.  If we have cached
+      // information, we will use it, otherwise we compute it.
+      DEBUG(AssertSorted(*Cache, NumSortedEntries));
+      MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
+                                                 Loc, isLoad, BB, Cache,
+                                                 NumSortedEntries);
+
+      // If we got a Def or Clobber, add this to the list of results.
+      if (!Dep.isNonLocal()) {
+        if (!DT) {
+          Result.push_back(NonLocalDepResult(BB,
+                                             MemDepResult::getUnknown(),
+                                             Pointer.getAddr()));
+          continue;
+        } else if (DT->isReachableFromEntry(BB)) {
+          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+          continue;
+        }
+      }
+    }
+
+    // If 'Pointer' is an instruction defined in this block, then we need to do
+    // phi translation to change it into a value live in the predecessor block.
+    // If not, we just add the predecessors to the worklist and scan them with
+    // the same Pointer.
+    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+      SkipFirstBlock = false;
+      SmallVector<BasicBlock*, 16> NewBlocks;
+      for (BasicBlock *Pred : PredCache.get(BB)) {
+        // Verify that we haven't looked at this block yet.
+        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+          InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
+        if (InsertRes.second) {
+          // First time we've looked at *PI.
+          NewBlocks.push_back(Pred);
+          continue;
+        }
+
+        // If we have seen this block before, but it was with a different
+        // pointer then we have a phi translation failure and we have to treat
+        // this as a clobber.
+        if (InsertRes.first->second != Pointer.getAddr()) {
+          // Make sure to clean up the Visited map before continuing on to
+          // PredTranslationFailure.
+          for (unsigned i = 0; i < NewBlocks.size(); i++)
+            Visited.erase(NewBlocks[i]);
+          goto PredTranslationFailure;
+        }
+      }
+      Worklist.append(NewBlocks.begin(), NewBlocks.end());
+      continue;
+    }
+
+    // We do need to do phi translation, if we know ahead of time we can't phi
+    // translate this value, don't even try.
+    if (!Pointer.IsPotentiallyPHITranslatable())
+      goto PredTranslationFailure;
+
+    // We may have added values to the cache list before this PHI translation.
+    // If so, we haven't done anything to ensure that the cache remains sorted.
+    // Sort it now (if needed) so that recursive invocations of
+    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+    // value will only see properly sorted cache arrays.
+    if (Cache && NumSortedEntries != Cache->size()) {
+      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      NumSortedEntries = Cache->size();
+    }
+    Cache = nullptr;
+
+    PredList.clear();
+    for (BasicBlock *Pred : PredCache.get(BB)) {
+      PredList.push_back(std::make_pair(Pred, Pointer));
+
+      // Get the PHI translated pointer in this predecessor.  This can fail if
+      // not translatable, in which case the getAddr() returns null.
+      PHITransAddr &PredPointer = PredList.back().second;
+      PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false);
+      Value *PredPtrVal = PredPointer.getAddr();
+
+      // Check to see if we have already visited this pred block with another
+      // pointer.  If so, we can't do this lookup.  This failure can occur
+      // with PHI translation when a critical edge exists and the PHI node in
+      // the successor translates to a pointer value different than the
+      // pointer the block was first analyzed with.
+      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+        InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+      if (!InsertRes.second) {
+        // We found the pred; take it off the list of preds to visit.
+        PredList.pop_back();
+
+        // If the predecessor was visited with PredPtr, then we already did
+        // the analysis and can ignore it.
+        if (InsertRes.first->second == PredPtrVal)
+          continue;
+
+        // Otherwise, the block was previously analyzed with a different
+        // pointer.  We can't represent the result of this case, so we just
+        // treat this as a phi translation failure.
+
+        // Make sure to clean up the Visited map before continuing on to
+        // PredTranslationFailure.
+        for (unsigned i = 0, n = PredList.size(); i < n; ++i)
+          Visited.erase(PredList[i].first);
+
+        goto PredTranslationFailure;
+      }
+    }
+
+    // Actually process results here; this need to be a separate loop to avoid
+    // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+    // any results for.  (getNonLocalPointerDepFromBB will modify our
+    // datastructures in ways the code after the PredTranslationFailure label
+    // doesn't expect.)
+    for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
+      BasicBlock *Pred = PredList[i].first;
+      PHITransAddr &PredPointer = PredList[i].second;
+      Value *PredPtrVal = PredPointer.getAddr();
+
+      bool CanTranslate = true;
+      // If PHI translation was unable to find an available pointer in this
+      // predecessor, then we have to assume that the pointer is clobbered in
+      // that predecessor.  We can still do PRE of the load, which would insert
+      // a computation of the pointer in this predecessor.
+      if (!PredPtrVal)
+        CanTranslate = false;
+
+      // FIXME: it is entirely possible that PHI translating will end up with
+      // the same value.  Consider PHI translating something like:
+      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
+      // to recurse here, pedantically speaking.
+
+      // If getNonLocalPointerDepFromBB fails here, that means the cached
+      // result conflicted with the Visited list; we have to conservatively
+      // assume it is unknown, but this also does not block PRE of the load.
+      if (!CanTranslate ||
+          getNonLocalPointerDepFromBB(QueryInst, PredPointer,
+                                      Loc.getWithNewPtr(PredPtrVal),
+                                      isLoad, Pred,
+                                      Result, Visited)) {
+        // Add the entry to the Result list.
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
+        Result.push_back(Entry);
+
+        // Since we had a phi translation failure, the cache for CacheKey won't
+        // include all of the entries that we need to immediately satisfy future
+        // queries.  Mark this in NonLocalPointerDeps by setting the
+        // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
+        // cached value to do more work but not miss the phi trans failure.
+        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+        NLPI.Pair = BBSkipFirstBlockPair();
+        continue;
+      }
+    }
+
+    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+    CacheInfo = &NonLocalPointerDeps[CacheKey];
+    Cache = &CacheInfo->NonLocalDeps;
+    NumSortedEntries = Cache->size();
+
+    // Since we did phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set"  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    SkipFirstBlock = false;
+    continue;
+
+  PredTranslationFailure:
+    // The following code is "failure"; we can't produce a sane translation
+    // for the given block.  It assumes that we haven't modified any of
+    // our datastructures while processing the current block.
+
+    if (!Cache) {
+      // Refresh the CacheInfo/Cache pointer if it got invalidated.
+      CacheInfo = &NonLocalPointerDeps[CacheKey];
+      Cache = &CacheInfo->NonLocalDeps;
+      NumSortedEntries = Cache->size();
+    }
+
+    // Since we failed phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set".  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+
+    // If *nothing* works, mark the pointer as unknown.
+    //
+    // If this is the magic first block, return this as a clobber of the whole
+    // incoming value.  Since we can't phi translate to one of the predecessors,
+    // we have to bail out.
+    if (SkipFirstBlock)
+      return true;
+
+    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+      assert(I != Cache->rend() && "Didn't find current block??");
+      if (I->getBB() != BB)
+        continue;
+
+      assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
+             "Should only be here with transparent block");
+      I->setResult(MemDepResult::getUnknown());
+      Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+                                         Pointer.getAddr()));
+      break;
+    }
+  }
+
+  // Okay, we're done now.  If we added new values to the cache, re-sort it.
+  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+  DEBUG(AssertSorted(*Cache));
+  return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+  CachedNonLocalPointerInfo::iterator It =
+    NonLocalPointerDeps.find(P);
+  if (It == NonLocalPointerDeps.end()) return;
+
+  // Remove all of the entries in the BB->val map.  This involves removing
+  // instructions from the reverse map.
+  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+
+  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+    Instruction *Target = PInfo[i].getResult().getInst();
+    if (!Target) continue;  // Ignore non-local dep results.
+    assert(Target->getParent() == PInfo[i].getBB());
+
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+  }
+
+  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+  NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep.  This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+  // If Ptr isn't really a pointer, just ignore it.
+  if (!Ptr->getType()->isPointerTy()) return;
+  // Flush store info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+  // Flush load info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+  PredCache.clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+  // Walk through the Non-local dependencies, removing this one as the value
+  // for any cached queries.
+  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+  if (NLDI != NonLocalDeps.end()) {
+    NonLocalDepInfo &BlockMap = NLDI->second.first;
+    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+         DI != DE; ++DI)
+      if (Instruction *Inst = DI->getResult().getInst())
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+    NonLocalDeps.erase(NLDI);
+  }
+
+  // If we have a cached local dependence query for this instruction, remove it.
+  //
+  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+  if (LocalDepEntry != LocalDeps.end()) {
+    // Remove us from DepInst's reverse set now that the local dep info is gone.
+    if (Instruction *Inst = LocalDepEntry->second.getInst())
+      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+    // Remove this local dependency info.
+    LocalDeps.erase(LocalDepEntry);
+  }
+
+  // If we have any cached pointer dependencies on this instruction, remove
+  // them.  If the instruction has non-pointer type, then it can't be a pointer
+  // base.
+
+  // Remove it from both the load info and the store info.  The instruction
+  // can't be in either of these maps if it is non-pointer.
+  if (RemInst->getType()->isPointerTy()) {
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+  }
+
+  // Loop over all of the things that depend on the instruction we're removing.
+  //
+  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+  // If we find RemInst as a clobber or Def in any of the maps for other values,
+  // we need to replace its entry with a dirty version of the instruction after
+  // it.  If RemInst is a terminator, we use a null dirty value.
+  //
+  // Using a dirty version of the instruction after RemInst saves having to scan
+  // the entire block to get to this point.
+  MemDepResult NewDirtyVal;
+  if (!RemInst->isTerminator())
+    NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
+
+  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseLocalDeps.end()) {
+    // RemInst can't be the terminator if it has local stuff depending on it.
+    assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
+           "Nothing can locally depend on a terminator");
+
+    for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
+      assert(InstDependingOnRemInst != RemInst &&
+             "Already removed our local dep info");
+
+      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+
+      // Make sure to remember that new things depend on NewDepInst.
+      assert(NewDirtyVal.getInst() && "There is no way something else can have "
+             "a local dep on this if it is a terminator!");
+      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+                                                InstDependingOnRemInst));
+    }
+
+    ReverseLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating the
+    // 'ReverseDeps' reference.
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+
+  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+    for (Instruction *I : ReverseDepIt->second) {
+      assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
+
+      PerInstNLInfo &INLD = NonLocalDeps[I];
+      // The information is now dirty!
+      INLD.second = true;
+
+      for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+           DE = INLD.first.end(); DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+
+        if (Instruction *NextI = NewDirtyVal.getInst())
+          ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
+      }
+    }
+
+    ReverseNonLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+
+  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+  // value in the NonLocalPointerDeps info.
+  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+    ReverseNonLocalPtrDeps.find(RemInst);
+  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+
+    for (ValueIsLoadPair P : ReversePtrDepIt->second) {
+      assert(P.getPointer() != RemInst &&
+             "Already removed NonLocalPointerDeps info for RemInst");
+
+      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+
+      // The cache is not valid for any specific block anymore.
+      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+
+      // Update any entries for RemInst to use the instruction after it.
+      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+           DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+
+        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+      }
+
+      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
+      // subsequent value may invalidate the sortedness.
+      std::sort(NLPDI.begin(), NLPDI.end());
+    }
+
+    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+
+    while (!ReversePtrDepsToAdd.empty()) {
+      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+        .insert(ReversePtrDepsToAdd.back().second);
+      ReversePtrDepsToAdd.pop_back();
+    }
+  }
+
+
+  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+  DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
+  for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+       E = LocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    assert(I->second.getInst() != D &&
+           "Inst occurs in data structures");
+  }
+
+  for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+       E = NonLocalPointerDeps.end(); I != E; ++I) {
+    assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+    for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+         II != E; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+  }
+
+  for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+       E = NonLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    const PerInstNLInfo &INLD = I->second;
+    for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+         EE = INLD.first.end(); II  != EE; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+  }
+
+  for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+       E = ReverseLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (Instruction *Inst : I->second)
+      assert(Inst != D && "Inst occurs in data structures");
+  }
+
+  for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+       E = ReverseNonLocalDeps.end();
+       I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (Instruction *Inst : I->second)
+      assert(Inst != D && "Inst occurs in data structures");
+  }
+
+  for (ReverseNonLocalPtrDepTy::const_iterator
+       I = ReverseNonLocalPtrDeps.begin(),
+       E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in rev NLPD map");
+
+    for (ValueIsLoadPair P : I->second)
+      assert(P != ValueIsLoadPair(D, false) &&
+             P != ValueIsLoadPair(D, true) &&
+             "Inst occurs in ReverseNonLocalPtrDeps map");
+  }
+#endif
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
new file mode 100644
index 0000000..e449126
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -0,0 +1,174 @@
+//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+using namespace llvm;
+
+MemoryLocation MemoryLocation::get(const LoadInst *LI) {
+  AAMDNodes AATags;
+  LI->getAAMetadata(AATags);
+  const auto &DL = LI->getModule()->getDataLayout();
+
+  return MemoryLocation(LI->getPointerOperand(),
+                        DL.getTypeStoreSize(LI->getType()), AATags);
+}
+
+MemoryLocation MemoryLocation::get(const StoreInst *SI) {
+  AAMDNodes AATags;
+  SI->getAAMetadata(AATags);
+  const auto &DL = SI->getModule()->getDataLayout();
+
+  return MemoryLocation(SI->getPointerOperand(),
+                        DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+                        AATags);
+}
+
+MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
+  AAMDNodes AATags;
+  VI->getAAMetadata(AATags);
+
+  return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+}
+
+MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
+  AAMDNodes AATags;
+  CXI->getAAMetadata(AATags);
+  const auto &DL = CXI->getModule()->getDataLayout();
+
+  return MemoryLocation(
+      CXI->getPointerOperand(),
+      DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+}
+
+MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
+  AAMDNodes AATags;
+  RMWI->getAAMetadata(AATags);
+  const auto &DL = RMWI->getModule()->getDataLayout();
+
+  return MemoryLocation(RMWI->getPointerOperand(),
+                        DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+                        AATags);
+}
+
+MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have AA tags. For memcpy, they apply
+  // to both the source and the destination.
+  AAMDNodes AATags;
+  MTI->getAAMetadata(AATags);
+
+  return MemoryLocation(MTI->getRawSource(), Size, AATags);
+}
+
+MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have AA tags. For memcpy, they apply
+  // to both the source and the destination.
+  AAMDNodes AATags;
+  MTI->getAAMetadata(AATags);
+
+  return MemoryLocation(MTI->getRawDest(), Size, AATags);
+}
+
+// FIXME: This code is duplicated with BasicAliasAnalysis and should be hoisted
+// to some common utility location.
+static bool isMemsetPattern16(const Function *MS,
+                              const TargetLibraryInfo &TLI) {
+  if (TLI.has(LibFunc::memset_pattern16) &&
+      MS->getName() == "memset_pattern16") {
+    FunctionType *MemsetType = MS->getFunctionType();
+    if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+        isa<PointerType>(MemsetType->getParamType(0)) &&
+        isa<PointerType>(MemsetType->getParamType(1)) &&
+        isa<IntegerType>(MemsetType->getParamType(2)))
+      return true;
+  }
+
+  return false;
+}
+
+MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
+                                              unsigned ArgIdx,
+                                              const TargetLibraryInfo &TLI) {
+  AAMDNodes AATags;
+  CS->getAAMetadata(AATags);
+  const Value *Arg = CS.getArgument(ArgIdx);
+
+  // We may be able to produce an exact size for known intrinsics.
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+    const DataLayout &DL = II->getModule()->getDataLayout();
+
+    switch (II->getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+      assert((ArgIdx == 0 || ArgIdx == 1) &&
+             "Invalid argument index for memory intrinsic");
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+        return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+      break;
+
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      assert(ArgIdx == 1 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);
+
+    case Intrinsic::invariant_end:
+      assert(ArgIdx == 2 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);
+
+    case Intrinsic::arm_neon_vld1:
+      assert(ArgIdx == 0 && "Invalid argument index");
+      // LLVM's vld1 and vst1 intrinsics currently only support a single
+      // vector register.
+      return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);
+
+    case Intrinsic::arm_neon_vst1:
+      assert(ArgIdx == 0 && "Invalid argument index");
+      return MemoryLocation(
+          Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
+    }
+  }
+
+  // We can bound the aliasing properties of memset_pattern16 just as we can
+  // for memcpy/memset.  This is particularly important because the
+  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+  // whenever possible.
+  if (CS.getCalledFunction() &&
+      isMemsetPattern16(CS.getCalledFunction(), TLI)) {
+    assert((ArgIdx == 0 || ArgIdx == 1) &&
+           "Invalid argument index for memset_pattern16");
+    if (ArgIdx == 1)
+      return MemoryLocation(Arg, 16, AATags);
+    if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
+      return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+  }
+  // FIXME: Handle memset_pattern4 and memset_pattern8 also.
+
+  return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 0000000..36c4714
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,126 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class ModuleDebugInfoPrinter : public ModulePass {
+    DebugInfoFinder Finder;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ModuleDebugInfoPrinter() : ModulePass(ID) {
+      initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+    void print(raw_ostream &O, const Module *M) const override;
+  };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+                "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+  return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+  Finder.processModule(M);
+  return false;
+}
+
+static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
+                      unsigned Line = 0) {
+  if (Filename.empty())
+    return;
+
+  O << " from ";
+  if (!Directory.empty())
+    O << Directory << "/";
+  O << Filename;
+  if (Line)
+    O << ":" << Line;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+  // Printing the nodes directly isn't particularly helpful (since they
+  // reference other nodes that won't be printed, particularly for the
+  // filenames), so just print a few useful things.
+  for (DICompileUnit *CU : Finder.compile_units()) {
+    O << "Compile unit: ";
+    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+      O << Lang;
+    else
+      O << "unknown-language(" << CU->getSourceLanguage() << ")";
+    printFile(O, CU->getFilename(), CU->getDirectory());
+    O << '\n';
+  }
+
+  for (DISubprogram *S : Finder.subprograms()) {
+    O << "Subprogram: " << S->getName();
+    printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+    if (!S->getLinkageName().empty())
+      O << " ('" << S->getLinkageName() << "')";
+    O << '\n';
+  }
+
+  for (const DIGlobalVariable *GV : Finder.global_variables()) {
+    O << "Global variable: " << GV->getName();
+    printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+    if (!GV->getLinkageName().empty())
+      O << " ('" << GV->getLinkageName() << "')";
+    O << '\n';
+  }
+
+  for (const DIType *T : Finder.types()) {
+    O << "Type:";
+    if (!T->getName().empty())
+      O << ' ' << T->getName();
+    printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+    if (auto *BT = dyn_cast<DIBasicType>(T)) {
+      O << " ";
+      if (const char *Encoding =
+              dwarf::AttributeEncodingString(BT->getEncoding()))
+        O << Encoding;
+      else
+        O << "unknown-encoding(" << BT->getEncoding() << ')';
+    } else {
+      O << ' ';
+      if (const char *Tag = dwarf::TagString(T->getTag()))
+        O << Tag;
+      else
+        O << "unknown-tag(" << T->getTag() << ")";
+    }
+    if (auto *CT = dyn_cast<DICompositeType>(T)) {
+      if (auto *S = CT->getRawIdentifier())
+        O << " (identifier: '" << S->getString() << "')";
+    }
+    O << '\n';
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
new file mode 100644
index 0000000..25f660f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -0,0 +1,170 @@
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+/// TODO: Theoretically we could check for dependencies between objc_* calls
+/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
+
+#define DEBUG_TYPE "objc-arc-aa"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
+                                   const MemoryLocation &LocB) {
+  if (!EnableARCOpts)
+    return AAResultBase::alias(LocA, LocB);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+  // precise alias query.
+  const Value *SA = GetRCIdentityRoot(LocA.Ptr);
+  const Value *SB = GetRCIdentityRoot(LocB.Ptr);
+  AliasResult Result =
+      AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
+                          MemoryLocation(SB, LocB.Size, LocB.AATags));
+  if (Result != MayAlias)
+    return Result;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *UA = GetUnderlyingObjCPtr(SA, DL);
+  const Value *UB = GetUnderlyingObjCPtr(SB, DL);
+  if (UA != SA || UB != SB) {
+    Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
+    // We can't use MustAlias or PartialAlias results here because
+    // GetUnderlyingObjCPtr may return an offsetted pointer value.
+    if (Result == NoAlias)
+      return NoAlias;
+  }
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return MayAlias;
+}
+
+bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                             bool OrLocal) {
+  if (!EnableARCOpts)
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making
+  // a precise alias query.
+  const Value *S = GetRCIdentityRoot(Loc.Ptr);
+  if (AAResultBase::pointsToConstantMemory(
+          MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
+    return true;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *U = GetUnderlyingObjCPtr(S, DL);
+  if (U != S)
+    return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return false;
+}
+
+FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
+  if (!EnableARCOpts)
+    return AAResultBase::getModRefBehavior(F);
+
+  switch (GetFunctionClass(F)) {
+  case ARCInstKind::NoopCast:
+    return FMRB_DoesNotAccessMemory;
+  default:
+    break;
+  }
+
+  return AAResultBase::getModRefBehavior(F);
+}
+
+ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+                                          const MemoryLocation &Loc) {
+  if (!EnableARCOpts)
+    return AAResultBase::getModRefInfo(CS, Loc);
+
+  switch (GetBasicARCInstKind(CS.getInstruction())) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    // These functions don't access any memory visible to the compiler.
+    // Note that this doesn't include objc_retainBlock, because it updates
+    // pointers when it copies block data.
+    return MRI_NoModRef;
+  default:
+    break;
+  }
+
+  return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return ObjCARCAAResult(F.getParent()->getDataLayout(),
+                         AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char ObjCARCAA::PassID;
+
+char ObjCARCAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa",
+                      "ObjC-ARC-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa",
+                    "ObjC-ARC-Based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createObjCARCAAWrapperPass() {
+  return new ObjCARCAAWrapperPass();
+}
+
+ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) {
+  initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ObjCARCAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new ObjCARCAAResult(
+      M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool ObjCARCAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
new file mode 100644
index 0000000..e3e74aa
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -0,0 +1,28 @@
+//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::desc("enable/disable all ARC Optimizations"),
+                       cl::location(EnableARCOpts),
+                       cl::init(true));
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
new file mode 100644
index 0000000..133b635
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -0,0 +1,675 @@
+//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+                                       const ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+    return OS << "ARCInstKind::Retain";
+  case ARCInstKind::RetainRV:
+    return OS << "ARCInstKind::RetainRV";
+  case ARCInstKind::RetainBlock:
+    return OS << "ARCInstKind::RetainBlock";
+  case ARCInstKind::Release:
+    return OS << "ARCInstKind::Release";
+  case ARCInstKind::Autorelease:
+    return OS << "ARCInstKind::Autorelease";
+  case ARCInstKind::AutoreleaseRV:
+    return OS << "ARCInstKind::AutoreleaseRV";
+  case ARCInstKind::AutoreleasepoolPush:
+    return OS << "ARCInstKind::AutoreleasepoolPush";
+  case ARCInstKind::AutoreleasepoolPop:
+    return OS << "ARCInstKind::AutoreleasepoolPop";
+  case ARCInstKind::NoopCast:
+    return OS << "ARCInstKind::NoopCast";
+  case ARCInstKind::FusedRetainAutorelease:
+    return OS << "ARCInstKind::FusedRetainAutorelease";
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return OS << "ARCInstKind::FusedRetainAutoreleaseRV";
+  case ARCInstKind::LoadWeakRetained:
+    return OS << "ARCInstKind::LoadWeakRetained";
+  case ARCInstKind::StoreWeak:
+    return OS << "ARCInstKind::StoreWeak";
+  case ARCInstKind::InitWeak:
+    return OS << "ARCInstKind::InitWeak";
+  case ARCInstKind::LoadWeak:
+    return OS << "ARCInstKind::LoadWeak";
+  case ARCInstKind::MoveWeak:
+    return OS << "ARCInstKind::MoveWeak";
+  case ARCInstKind::CopyWeak:
+    return OS << "ARCInstKind::CopyWeak";
+  case ARCInstKind::DestroyWeak:
+    return OS << "ARCInstKind::DestroyWeak";
+  case ARCInstKind::StoreStrong:
+    return OS << "ARCInstKind::StoreStrong";
+  case ARCInstKind::CallOrUser:
+    return OS << "ARCInstKind::CallOrUser";
+  case ARCInstKind::Call:
+    return OS << "ARCInstKind::Call";
+  case ARCInstKind::User:
+    return OS << "ARCInstKind::User";
+  case ARCInstKind::IntrinsicUser:
+    return OS << "ARCInstKind::IntrinsicUser";
+  case ARCInstKind::None:
+    return OS << "ARCInstKind::None";
+  }
+  llvm_unreachable("Unknown instruction class!");
+}
+
+ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No (mandatory) arguments.
+  if (AI == AE)
+    return StringSwitch<ARCInstKind>(F->getName())
+        .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
+        .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
+        .Default(ARCInstKind::CallOrUser);
+
+  // One argument.
+  const Argument *A0 = &*AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<ARCInstKind>(F->getName())
+            .Case("objc_retain", ARCInstKind::Retain)
+            .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+            .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+            .Case("objc_release", ARCInstKind::Release)
+            .Case("objc_autorelease", ARCInstKind::Autorelease)
+            .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+            .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+            .Case("objc_retainedObject", ARCInstKind::NoopCast)
+            .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+            .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+            .Case("objc_retain_autorelease",
+                  ARCInstKind::FusedRetainAutorelease)
+            .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+            .Case("objc_retainAutoreleaseReturnValue",
+                  ARCInstKind::FusedRetainAutoreleaseRV)
+            .Case("objc_sync_enter", ARCInstKind::User)
+            .Case("objc_sync_exit", ARCInstKind::User)
+            .Default(ARCInstKind::CallOrUser);
+
+      // Argument is i8**
+      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<ARCInstKind>(F->getName())
+              .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+              .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+              .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
+              .Default(ARCInstKind::CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = &*AI++;
+  if (AI == AE)
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<ARCInstKind>(F->getName())
+                  .Case("objc_storeWeak", ARCInstKind::StoreWeak)
+                  .Case("objc_initWeak", ARCInstKind::InitWeak)
+                  .Case("objc_storeStrong", ARCInstKind::StoreStrong)
+                  .Default(ARCInstKind::CallOrUser);
+            // Second argument is i8**.
+            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<ARCInstKind>(F->getName())
+                    .Case("objc_moveWeak", ARCInstKind::MoveWeak)
+                    .Case("objc_copyWeak", ARCInstKind::CopyWeak)
+                    // Ignore annotation calls. This is important to stop the
+                    // optimizer from treating annotations as uses which would
+                    // make the state of the pointers they are attempting to
+                    // elucidate to be incorrect.
+                    .Case("llvm.arc.annotation.topdown.bbstart",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.topdown.bbend",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.bottomup.bbstart",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.bottomup.bbend",
+                          ARCInstKind::None)
+                    .Default(ARCInstKind::CallOrUser);
+          }
+
+  // Anything else.
+  return ARCInstKind::CallOrUser;
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isInertIntrinsic(unsigned ID) {
+  // TODO: Make this into a covered switch.
+  switch (ID) {
+  case Intrinsic::returnaddress:
+  case Intrinsic::frameaddress:
+  case Intrinsic::stacksave:
+  case Intrinsic::stackrestore:
+  case Intrinsic::vastart:
+  case Intrinsic::vacopy:
+  case Intrinsic::vaend:
+  case Intrinsic::objectsize:
+  case Intrinsic::prefetch:
+  case Intrinsic::stackprotector:
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+  case Intrinsic::eh_typeid_for:
+  case Intrinsic::eh_dwarf_cfa:
+  case Intrinsic::eh_sjlj_lsda:
+  case Intrinsic::eh_sjlj_functioncontext:
+  case Intrinsic::init_trampoline:
+  case Intrinsic::adjust_trampoline:
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+  case Intrinsic::invariant_end:
+  // Don't let dbg info affect our results.
+  case Intrinsic::dbg_declare:
+  case Intrinsic::dbg_value:
+    // Short cut: Some intrinsics obviously don't use ObjC pointers.
+    return true;
+  default:
+    return false;
+  }
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isUseOnlyIntrinsic(unsigned ID) {
+  // We are conservative and even though intrinsics are unlikely to touch
+  // reference counts, we white list them for safety.
+  //
+  // TODO: Expand this into a covered switch. There is a lot more here.
+  switch (ID) {
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Determine what kind of construct V is.
+ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // See if we have a function that we know something about.
+      if (const Function *F = CI->getCalledFunction()) {
+        ARCInstKind Class = GetFunctionClass(F);
+        if (Class != ARCInstKind::CallOrUser)
+          return Class;
+        Intrinsic::ID ID = F->getIntrinsicID();
+        if (isInertIntrinsic(ID))
+          return ARCInstKind::None;
+        if (isUseOnlyIntrinsic(ID))
+          return ARCInstKind::User;
+      }
+
+      // Otherwise, be conservative.
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      // Otherwise, be conservative.
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select:
+    case Instruction::PHI:
+    case Instruction::Ret:
+    case Instruction::Br:
+    case Instruction::Switch:
+    case Instruction::IndirectBr:
+    case Instruction::Alloca:
+    case Instruction::VAArg:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::FDiv:
+    case Instruction::SRem:
+    case Instruction::URem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::SExt:
+    case Instruction::ZExt:
+    case Instruction::Trunc:
+    case Instruction::IntToPtr:
+    case Instruction::FCmp:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::InsertElement:
+    case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+        return ARCInstKind::User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      // Note that this includes both operands of a Store: while the first
+      // operand isn't actually being dereferenced, it is being stored to
+      // memory where we can no longer track who might read it and dereference
+      // it, so we have to consider it potentially used.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialRetainableObjPtr(*OI))
+          return ARCInstKind::User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return ARCInstKind::None;
+}
+
+/// \brief Test if the given class is a kind of user.
+bool llvm::objcarc::IsUser(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::User:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::IntrinsicUser:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::Call:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool llvm::objcarc::IsRetain(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+    return true;
+  // I believe we treat retain block as not a retain since it can copy its
+  // block.
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+    return true;
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::RetainBlock:
+    return true;
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
+  // ARCInstKind::RetainBlock may be given a stack argument.
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::AutoreleaseRV:
+    return true;
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
+  /// It is never safe to tail call objc_autorelease since by tail calling
+  /// objc_autorelease: fast autoreleasing causing our object to be potentially
+  /// reclaimed from the autorelease pool which violates the semantics of
+  /// __autoreleasing types in ARC.
+  switch (Class) {
+  case ARCInstKind::Autorelease:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
+  // objc_retainBlock is not nounwind because it calls user copy constructors
+  // which could theoretically throw.
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+    return true;
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+///
+/// This means that it *could* interrupt the RV optimization.
+bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
+  switch (Kind) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+
+  // The cases below are conservative.
+
+  // RetainBlock can result in user defined copy constructors being called
+  // implying releases may occur.
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+    return true;
+  }
+
+  llvm_unreachable("covered switch isn't covered?");
+}
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
new file mode 100644
index 0000000..0f0016f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -0,0 +1,85 @@
+//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the OrderedBasicBlock class. OrderedBasicBlock
+// maintains an interface where clients can query if one instruction comes
+// before another in a BasicBlock. Since BasicBlock currently lacks a reliable
+// way to query relative position between instructions one can use
+// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a
+// source BasicBlock and maintains an internal Instruction -> Position map. A
+// OrderedBasicBlock instance should be discarded whenever the source
+// BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/Instruction.h"
+using namespace llvm;
+
+OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB)
+    : NextInstPos(0), BB(BasicB) {
+  LastInstFound = BB->end();
+}
+
+/// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+/// Cache and number out instruction while walking \p BB.
+bool OrderedBasicBlock::comesBefore(const Instruction *A,
+                                    const Instruction *B) {
+  const Instruction *Inst = nullptr;
+  assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
+         "Instruction supposed to be in NumberedInsts");
+
+  // Start the search with the instruction found in the last lookup round.
+  auto II = BB->begin();
+  auto IE = BB->end();
+  if (LastInstFound != IE)
+    II = std::next(LastInstFound);
+
+  // Number all instructions up to the point where we find 'A' or 'B'.
+  for (; II != IE; ++II) {
+    Inst = cast<Instruction>(II);
+    NumberedInsts[Inst] = NextInstPos++;
+    if (Inst == A || Inst == B)
+      break;
+  }
+
+  assert(II != IE && "Instruction not found?");
+  assert((Inst == A || Inst == B) && "Should find A or B");
+  LastInstFound = II;
+  return Inst == A;
+}
+
+/// \brief Find out whether \p A dominates \p B, meaning whether \p A
+/// comes before \p B in \p BB. This is a simplification that considers
+/// cached instruction positions and ignores other basic blocks, being
+/// only relevant to compare relative instructions positions inside \p BB.
+bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
+  assert(A->getParent() == B->getParent() &&
+         "Instructions must be in the same basic block!");
+
+  // First we lookup the instructions. If they don't exist, lookup will give us
+  // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
+  // exists and NB doesn't, it means NA must come before NB because we would
+  // have numbered NB as well if it didn't. The same is true for NB. If it
+  // exists, but NA does not, NA must come after it. If neither exist, we need
+  // to number the block and cache the results (by calling comesBefore).
+  auto NAI = NumberedInsts.find(A);
+  auto NBI = NumberedInsts.find(B);
+  if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end())
+    return NAI->second < NBI->second;
+  if (NAI != NumberedInsts.end())
+    return true;
+  if (NBI != NumberedInsts.end())
+    return false;
+
+  return comesBefore(A, B);
+}
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 0000000..f7545ea
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,442 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+  if (isa<PHINode>(Inst) ||
+      isa<GetElementPtrInst>(Inst))
+    return true;
+
+  if (isa<CastInst>(Inst) &&
+      isSafeToSpeculativelyExecute(Inst))
+    return true;
+
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1)))
+    return true;
+
+  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  return false;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void PHITransAddr::dump() const {
+  if (!Addr) {
+    dbgs() << "PHITransAddr: null\n";
+    return;
+  }
+  dbgs() << "PHITransAddr: " << *Addr << "\n";
+  for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+    dbgs() << "  Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+#endif
+
+
+static bool VerifySubExpr(Value *Expr,
+                          SmallVectorImpl<Instruction*> &InstInputs) {
+  // If this is a non-instruction value, there is nothing to do.
+  Instruction *I = dyn_cast<Instruction>(Expr);
+  if (!I) return true;
+
+  // If it's an instruction, it is either in Tmp or its operands recursively
+  // are.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return true;
+  }
+
+  // If it isn't in the InstInputs list it is a subexpr incorporated into the
+  // address.  Sanity check that it is phi translatable.
+  if (!CanPHITrans(I)) {
+    errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
+    errs() << *I << '\n';
+    llvm_unreachable("Either something is missing from InstInputs or "
+                     "CanPHITrans is wrong.");
+  }
+
+  // Validate the operands of the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!VerifySubExpr(I->getOperand(i), InstInputs))
+      return false;
+
+  return true;
+}
+
+/// Verify - Check internal consistency of this data structure.  If the
+/// structure is valid, it returns true.  If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+  if (!Addr) return true;
+
+  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+  if (!VerifySubExpr(Addr, Tmp))
+    return false;
+
+  if (!Tmp.empty()) {
+    errs() << "PHITransAddr contains extra instructions:\n";
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
+    llvm_unreachable("This is unexpected.");
+  }
+
+  // a-ok.
+  return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it.  This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+  // If the input value is not an instruction, or if it is not defined in CurBB,
+  // then we don't need to phi translate it.
+  Instruction *Inst = dyn_cast<Instruction>(Addr);
+  return !Inst || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+                             SmallVectorImpl<Instruction*> &InstInputs) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return;
+
+  // If the instruction is in the InstInputs list, remove it.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return;
+  }
+
+  assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+  // Otherwise, it must have instruction inputs itself.  Zap them recursively.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+      RemoveInstInputs(Op, InstInputs);
+  }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+                                         BasicBlock *PredBB,
+                                         const DominatorTree *DT) {
+  // If this is a non-instruction value, it can't require PHI translation.
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst) return V;
+
+  // Determine whether 'Inst' is an input to our PHI translatable expression.
+  bool isInput =
+      std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
+
+  // Handle inputs instructions if needed.
+  if (isInput) {
+    if (Inst->getParent() != CurBB) {
+      // If it is an input defined in a different block, then it remains an
+      // input.
+      return Inst;
+    }
+
+    // If 'Inst' is defined in this block and is an input that needs to be phi
+    // translated, we need to incorporate the value into the expression or fail.
+
+    // In either case, the instruction itself isn't an input any longer.
+    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+    // If this is a PHI, go ahead and translate it.
+    if (PHINode *PN = dyn_cast<PHINode>(Inst))
+      return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+    // If this is a non-phi value, and it is analyzable, we can incorporate it
+    // into the expression by making all instruction operands be inputs.
+    if (!CanPHITrans(Inst))
+      return nullptr;
+
+    // All instruction operands are now inputs (and of course, they may also be
+    // defined in this block, so they may need to be phi translated themselves.
+    for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+        InstInputs.push_back(Op);
+  }
+
+  // Ok, it must be an intermediate result (either because it started that way
+  // or because we just incorporated it into the expression).  See if its
+  // operands need to be phi translated, and if so, reconstruct it.
+
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
+    Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+    if (!PHIIn) return nullptr;
+    if (PHIIn == Cast->getOperand(0))
+      return Cast;
+
+    // Find an available version of this cast.
+
+    // Constants are trivial to find.
+    if (Constant *C = dyn_cast<Constant>(PHIIn))
+      return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+                                              C, Cast->getType()));
+
+    // Otherwise we have to see if a casted version of the incoming pointer
+    // is available.  If so, we can use it, otherwise we have to fail.
+    for (User *U : PHIIn->users()) {
+      if (CastInst *CastI = dyn_cast<CastInst>(U))
+        if (CastI->getOpcode() == Cast->getOpcode() &&
+            CastI->getType() == Cast->getType() &&
+            (!DT || DT->dominates(CastI->getParent(), PredBB)))
+          return CastI;
+    }
+    return nullptr;
+  }
+
+  // Handle getelementptr with at least one PHI translatable operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    bool AnyChanged = false;
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+      if (!GEPOp) return nullptr;
+
+      AnyChanged |= GEPOp != GEP->getOperand(i);
+      GEPOps.push_back(GEPOp);
+    }
+
+    if (!AnyChanged)
+      return GEP;
+
+    // Simplify the GEP to handle 'gep x, 0' -> x etc.
+    if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AC)) {
+      for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+        RemoveInstInputs(GEPOps[i], InstInputs);
+
+      return AddAsInput(V);
+    }
+
+    // Scan to see if we have this GEP available.
+    Value *APHIOp = GEPOps[0];
+    for (User *U : APHIOp->users()) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
+        if (GEPI->getType() == GEP->getType() &&
+            GEPI->getNumOperands() == GEPOps.size() &&
+            GEPI->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+          if (std::equal(GEPOps.begin(), GEPOps.end(), GEPI->op_begin()))
+            return GEPI;
+        }
+    }
+    return nullptr;
+  }
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+    if (!LHS) return nullptr;
+
+    // If the PHI translated LHS is an add of a constant, fold the immediates.
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+      if (BOp->getOpcode() == Instruction::Add)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+          LHS = BOp->getOperand(0);
+          RHS = ConstantExpr::getAdd(RHS, CI);
+          isNSW = isNUW = false;
+
+          // If the old 'LHS' was an input, add the new 'LHS' as an input.
+          if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
+              InstInputs.end()) {
+            RemoveInstInputs(BOp, InstInputs);
+            AddAsInput(LHS);
+          }
+        }
+
+    // See if the add simplifies away.
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) {
+      // If we simplified the operands, the LHS is no longer an input, but Res
+      // is.
+      RemoveInstInputs(LHS, InstInputs);
+      return AddAsInput(Res);
+    }
+
+    // If we didn't modify the add, just return it.
+    if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+      return Inst;
+
+    // Otherwise, see if we have this add available somewhere.
+    for (User *U : LHS->users()) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U))
+        if (BO->getOpcode() == Instruction::Add &&
+            BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+            BO->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(BO->getParent(), PredBB)))
+          return BO;
+    }
+
+    return nullptr;
+  }
+
+  // Otherwise, we failed.
+  return nullptr;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes.  If
+/// 'MustDominate' is true, the translated value must dominate
+/// PredBB.  This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+                                     const DominatorTree *DT,
+                                     bool MustDominate) {
+  assert(DT || !MustDominate);
+  assert(Verify() && "Invalid PHITransAddr!");
+  if (DT && DT->isReachableFromEntry(PredBB))
+    Addr =
+        PHITranslateSubExpr(Addr, CurBB, PredBB, MustDominate ? DT : nullptr);
+  else
+    Addr = nullptr;
+  assert(Verify() && "Invalid PHITransAddr!");
+
+  if (MustDominate)
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+      if (!DT->dominates(Inst->getParent(), PredBB))
+        Addr = nullptr;
+
+  return Addr == nullptr;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list.  This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                          const DominatorTree &DT,
+                          SmallVectorImpl<Instruction*> &NewInsts) {
+  unsigned NISize = NewInsts.size();
+
+  // Attempt to PHI translate with insertion.
+  Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+  // If successful, return the new value.
+  if (Addr) return Addr;
+
+  // If not, destroy any intermediate instructions inserted.
+  while (NewInsts.size() != NISize)
+    NewInsts.pop_back_val()->eraseFromParent();
+  return nullptr;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block.  All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                           BasicBlock *PredBB, const DominatorTree &DT,
+                           SmallVectorImpl<Instruction*> &NewInsts) {
+  // See if we have a version of this value already available and dominating
+  // PredBB.  If so, there is no need to insert a new instance of it.
+  PHITransAddr Tmp(InVal, DL, AC);
+  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
+    return Tmp.getAddr();
+
+  // We don't need to PHI translate values which aren't instructions.
+  auto *Inst = dyn_cast<Instruction>(InVal);
+  if (!Inst)
+    return nullptr;
+
+  // Handle cast of PHI translatable value.
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
+    Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (!OpVal) return nullptr;
+
+    // Otherwise insert a cast at the end of PredBB.
+    CastInst *New = CastInst::Create(Cast->getOpcode(), OpVal, InVal->getType(),
+                                     InVal->getName() + ".phi.trans.insert",
+                                     PredBB->getTerminator());
+    New->setDebugLoc(Inst->getDebugLoc());
+    NewInsts.push_back(New);
+    return New;
+  }
+
+  // Handle getelementptr with at least one PHI operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+                                                CurBB, PredBB, DT, NewInsts);
+      if (!OpVal) return nullptr;
+      GEPOps.push_back(OpVal);
+    }
+
+    GetElementPtrInst *Result = GetElementPtrInst::Create(
+        GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1),
+        InVal->getName() + ".phi.trans.insert", PredBB->getTerminator());
+    Result->setDebugLoc(Inst->getDebugLoc());
+    Result->setIsInBounds(GEP->isInBounds());
+    NewInsts.push_back(Result);
+    return Result;
+  }
+
+#if 0
+  // FIXME: This code works, but it is unclear that we actually want to insert
+  // a big chain of computation in order to make a value available in a block.
+  // This needs to be evaluated carefully to consider its cost trade offs.
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+
+    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+                                           InVal->getName()+".phi.trans.insert",
+                                                    PredBB->getTerminator());
+    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+    NewInsts.push_back(Res);
+    return Res;
+  }
+#endif
+
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
new file mode 100644
index 0000000..6d92909
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,50 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "postdomtree"
+
+//===----------------------------------------------------------------------===//
+//  PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+                "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+  delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+  return new PostDominatorTree();
+}
+
diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
new file mode 100644
index 0000000..68c7535
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp
@@ -0,0 +1,35 @@
+//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Implementation of the pointer use visitors.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PtrUseVisitor.h"
+
+using namespace llvm;
+
+void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
+  for (Use &U : I.uses()) {
+    if (VisitedUses.insert(&U).second) {
+      UseToVisit NewU = {
+        UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown),
+        Offset
+      };
+      Worklist.push_back(std::move(NewU));
+    }
+  }
+}
+
+bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
+  if (!IsOffsetKnown)
+    return false;
+
+  return GEPI.accumulateConstantOffset(DL, Offset);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 0000000..f59d267
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,182 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <iterator>
+#include <set>
+#ifndef NDEBUG
+#include "llvm/Analysis/RegionPrinter.h"
+#endif
+
+using namespace llvm;
+
+#define DEBUG_TYPE "region"
+
+namespace llvm {
+template class RegionBase<RegionTraits<Function>>;
+template class RegionNodeBase<RegionTraits<Function>>;
+template class RegionInfoBase<RegionTraits<Function>>;
+}
+
+STATISTIC(numRegions,       "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+// Always verify if expensive checking is enabled.
+
+static cl::opt<bool,true>
+VerifyRegionInfoX(
+  "verify-region-info",
+  cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo),
+  cl::desc("Verify region info (time consuming)"));
+
+
+static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
+  cl::location(RegionInfo::printStyle),
+  cl::Hidden,
+  cl::desc("style of printing regions"),
+  cl::values(
+    clEnumValN(Region::PrintNone, "none",  "print no details"),
+    clEnumValN(Region::PrintBB, "bb",
+               "print regions in detail with block_iterator"),
+    clEnumValN(Region::PrintRN, "rn",
+               "print regions in detail with element_iterator"),
+    clEnumValEnd));
+
+
+//===----------------------------------------------------------------------===//
+// Region implementation
+//
+
+Region::Region(BasicBlock *Entry, BasicBlock *Exit,
+               RegionInfo* RI,
+               DominatorTree *DT, Region *Parent) :
+  RegionBase<RegionTraits<Function>>(Entry, Exit, RI, DT, Parent) {
+
+}
+
+Region::~Region() { }
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+RegionInfo::RegionInfo() :
+  RegionInfoBase<RegionTraits<Function>>() {
+
+}
+
+RegionInfo::~RegionInfo() {
+
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+  ++numRegions;
+
+  // TODO: Slow. Should only be enabled if -stats is used.
+  if (R->isSimple())
+    ++numSimpleRegions;
+}
+
+void RegionInfo::recalculate(Function &F, DominatorTree *DT_,
+                             PostDominatorTree *PDT_, DominanceFrontier *DF_) {
+  DT = DT_;
+  PDT = PDT_;
+  DF = DF_;
+
+  TopLevelRegion = new Region(&F.getEntryBlock(), nullptr,
+                              this, DT, nullptr);
+  updateStatistics(TopLevelRegion);
+  calculate(F);
+}
+
+#ifndef NDEBUG
+void RegionInfo::view() { viewRegion(this); }
+
+void RegionInfo::viewOnly() { viewRegionOnly(this); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// RegionInfoPass implementation
+//
+
+RegionInfoPass::RegionInfoPass() : FunctionPass(ID) {
+  initializeRegionInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+RegionInfoPass::~RegionInfoPass() {
+
+}
+
+bool RegionInfoPass::runOnFunction(Function &F) {
+  releaseMemory();
+
+  auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto PDT = &getAnalysis<PostDominatorTree>();
+  auto DF = &getAnalysis<DominanceFrontier>();
+
+  RI.recalculate(F, DT, PDT, DF);
+  return false;
+}
+
+void RegionInfoPass::releaseMemory() {
+  RI.releaseMemory();
+}
+
+void RegionInfoPass::verifyAnalysis() const {
+    RI.verifyAnalysis();
+}
+
+void RegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+  AU.addRequired<PostDominatorTree>();
+  AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfoPass::print(raw_ostream &OS, const Module *) const {
+  RI.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void RegionInfoPass::dump() const {
+  RI.dump();
+}
+#endif
+
+char RegionInfoPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegionInfoPass, "regions",
+                "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfoPass, "regions",
+                "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+  FunctionPass *createRegionInfoPass() {
+    return new RegionInfoPass();
+  }
+}
+
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 0000000..5e1cdd4
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,282 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "regionpassmgr"
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager()
+  : FunctionPass(ID), PMDataManager() {
+  skipThisRegion = false;
+  redoThisRegion = false;
+  RI = nullptr;
+  CurrentRegion = nullptr;
+}
+
+// Recurse through all subregions and all regions  into RQ.
+static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) {
+  RQ.push_back(&R);
+  for (const auto &E : R)
+    addRegionIntoQueue(*E, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  Info.addRequired<RegionInfoPass>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+  RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  addRegionIntoQueue(*RI->getTopLevelRegion(), RQ);
+
+  if (RQ.empty()) // No regions, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+       I != E; ++I) {
+    Region *R = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *RP = (RegionPass *)getContainedPass(Index);
+      Changed |= RP->doInitialization(R, *this);
+    }
+  }
+
+  // Walk Regions
+  while (!RQ.empty()) {
+
+    CurrentRegion  = RQ.back();
+    skipThisRegion = false;
+    redoThisRegion = false;
+
+    // Run all passes on the current Region.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+      if (isPassDebuggingExecutionsOrMore()) {
+        dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+                     CurrentRegion->getNameStr());
+        dumpRequiredSet(P);
+      }
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+        TimeRegion PassTimer(getPassTimer(P));
+        Changed |= P->runOnRegion(CurrentRegion, *this);
+      }
+
+      if (isPassDebuggingExecutionsOrMore()) {
+        if (Changed)
+          dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+                       skipThisRegion ? "<deleted>" :
+                                      CurrentRegion->getNameStr());
+        dumpPreservedSet(P);
+      }
+
+      if (!skipThisRegion) {
+        // Manually check that this region is still healthy. This is done
+        // instead of relying on RegionInfo::verifyRegion since RegionInfo
+        // is a function pass and it's really expensive to verify every
+        // Region in the function every time. That level of checking can be
+        // enabled with the -verify-region-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(P));
+          CurrentRegion->verifyRegion();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ?
+                       "<deleted>" :  CurrentRegion->getNameStr(),
+                       ON_REGION_MSG);
+
+      if (skipThisRegion)
+        // Do not run other passes on this region.
+        break;
+    }
+
+    // If the region was deleted, release all the region passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisRegion)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_REGION_MSG);
+      }
+
+    // Pop the region from queue after running all passes.
+    RQ.pop_back();
+
+    if (redoThisRegion)
+      RQ.push_back(CurrentRegion);
+
+    // Free all region nodes created in region passes.
+    RI->clearNodeCache();
+  }
+
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    RegionPass *P = (RegionPass*)getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  // Print the region tree after all pass.
+  DEBUG(
+    dbgs() << "\nRegion tree of function " << F.getName()
+           << " after all region Pass:\n";
+    RI->dump();
+    dbgs() << "\n";
+    );
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Region Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintRegionPass(const std::string &B, raw_ostream &o)
+      : RegionPass(ID), Banner(B), Out(o) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  bool runOnRegion(Region *R, RGPassManager &RGM) override {
+    Out << Banner;
+    for (const auto *BB : R->blocks()) {
+      if (BB)
+        BB->print(Out);
+      else
+        Out << "Printing <null> Block";
+    }
+
+    return false;
+  }
+};
+
+char PrintRegionPass::ID = 0;
+}  //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new RGPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+    !PMS.top()->preserveHigherLevelAnalysis(this))
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+  RGPassManager *RGPM;
+
+  // Create new Region Pass Manager if it does not exist.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+    RGPM = (RGPassManager*)PMS.top();
+  else {
+
+    assert (!PMS.empty() && "Unable to create Region Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Region Pass Manager
+    RGPM = new RGPassManager();
+    RGPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(RGPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    TPM->schedulePass(RGPM);
+
+    // [4] Push new manager into PMS
+    PMS.push(RGPM);
+  }
+
+  RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 0000000..acb218d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,267 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include "llvm/IR/LegacyPassManager.h"
+#endif
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+                  cl::desc("Show only simple regions in the graphviz viewer"),
+                  cl::Hidden,
+                  cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+    if (!Node->isSubRegion()) {
+      BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+      if (isSimple())
+        return DOTGraphTraits<const Function*>
+          ::getSimpleNodeLabel(BB, BB->getParent());
+      else
+        return DOTGraphTraits<const Function*>
+          ::getCompleteNodeLabel(BB, BB->getParent());
+    }
+
+    return "Not implemented";
+  }
+};
+
+template <>
+struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
+
+  DOTGraphTraits (bool isSimple = false)
+    : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+  static std::string getGraphName(const RegionInfo *) { return "Region Graph"; }
+
+  std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+    return DOTGraphTraits<RegionNode *>::getNodeLabel(
+        Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion()));
+  }
+
+  std::string getEdgeAttributes(RegionNode *srcNode,
+                                GraphTraits<RegionInfo *>::ChildIteratorType CI,
+                                RegionInfo *G) {
+    RegionNode *destNode = *CI;
+
+    if (srcNode->isSubRegion() || destNode->isSubRegion())
+      return "";
+
+    // In case of a backedge, do not use it to define the layout of the nodes.
+    BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+    BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+    Region *R = G->getRegionFor(destBB);
+
+    while (R && R->getParent())
+      if (R->getParent()->getEntry() == destBB)
+        R = R->getParent();
+      else
+        break;
+
+    if (R && R->getEntry() == destBB && R->contains(srcBB))
+      return "constraint=false";
+
+    return "";
+  }
+
+  // Print the cluster of the subregions. This groups the single basic blocks
+  // and adds a different background color for each group.
+  static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW,
+                                 unsigned depth = 0) {
+    raw_ostream &O = GW.getOStream();
+    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
+      << " {\n";
+    O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+    if (!onlySimpleRegions || R.isSimple()) {
+      O.indent(2 * (depth + 1)) << "style = filled;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R.getDepth() * 2 % 12) + 1) << "\n";
+
+    } else {
+      O.indent(2 * (depth + 1)) << "style = solid;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R.getDepth() * 2 % 12) + 2) << "\n";
+    }
+
+    for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI)
+      printRegionCluster(**RI, GW, depth + 1);
+
+    const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
+
+    for (auto *BB : R.blocks())
+      if (RI.getRegionFor(BB) == &R)
+        O.indent(2 * (depth + 1)) << "Node"
+          << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
+          << ";\n";
+
+    O.indent(2 * depth) << "}\n";
+  }
+
+  static void addCustomGraphFeatures(const RegionInfo *G,
+                                     GraphWriter<RegionInfo *> &GW) {
+    raw_ostream &O = GW.getOStream();
+    O << "\tcolorscheme = \"paired12\"\n";
+    printRegionCluster(*G->getTopLevelRegion(), GW, 4);
+  }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionInfoPassGraphTraits {
+  static RegionInfo *getGraph(RegionInfoPass *RIP) {
+    return &RIP->getRegionInfo();
+  }
+};
+
+struct RegionPrinter
+    : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+                                   RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionPrinter()
+      : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+                              RegionInfoPassGraphTraits>("reg", ID) {
+    initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionPrinter::ID = 0;
+
+struct RegionOnlyPrinter
+    : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+                                   RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionOnlyPrinter()
+      : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+                              RegionInfoPassGraphTraits>("reg", ID) {
+    initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionOnlyPrinter::ID = 0;
+
+struct RegionViewer
+    : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+                                  RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionViewer()
+      : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+                             RegionInfoPassGraphTraits>("reg", ID) {
+    initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+    : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+                                  RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionOnlyViewer()
+      : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+                             RegionInfoPassGraphTraits>("regonly", ID) {
+    initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionOnlyViewer::ID = 0;
+
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+                "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(
+    RegionOnlyPrinter, "dot-regions-only",
+    "Print regions of function to 'dot' file (with no function bodies)", true,
+    true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+                true, true)
+
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+                "View regions of function (with no function bodies)",
+                true, true)
+
+FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); }
+
+FunctionPass *llvm::createRegionOnlyPrinterPass() {
+  return new RegionOnlyPrinter();
+}
+
+FunctionPass* llvm::createRegionViewerPass() {
+  return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+  return new RegionOnlyViewer();
+}
+
+#ifndef NDEBUG
+static void viewRegionInfo(RegionInfo *RI, bool ShortNames) {
+  assert(RI && "Argument must be non-null");
+
+  llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent();
+  std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI);
+
+  llvm::ViewGraph(RI, "reg", ShortNames,
+                  Twine(GraphName) + " for '" + F->getName() + "' function");
+}
+
+static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) {
+  assert(F && "Argument must be non-null");
+  assert(!F->isDeclaration() && "Function must have an implementation");
+
+  // The viewer and analysis passes do not modify anything, so we can safely
+  // remove the const qualifier
+  auto NonConstF = const_cast<Function *>(F);
+
+  llvm::legacy::FunctionPassManager FPM(NonConstF->getParent());
+  FPM.add(ViewerPass);
+  FPM.doInitialization();
+  FPM.run(*NonConstF);
+  FPM.doFinalization();
+}
+
+void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); }
+
+void llvm::viewRegion(const Function *F) {
+  invokeFunctionPass(F, createRegionViewerPass());
+}
+
+void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); }
+
+void llvm::viewRegionOnly(const Function *F) {
+  invokeFunctionPass(F, createRegionOnlyViewerPass());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 0000000..34074ef
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,9741 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library.  First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression.  These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+//  Chains of recurrences -- a method to expedite the evaluation
+//  of closed-form functions
+//  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+//  On computational properties of chains of recurrences
+//  Eugene V. Zima
+//
+//  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+//  Robert A. van Engelen
+//
+//  Efficient Symbolic Analysis for Optimizing Compilers
+//  Robert A. van Engelen
+//
+//  Using the chains of recurrences algebra for data dependence testing and
+//  induction variable substitution
+//  MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "scalar-evolution"
+
+STATISTIC(NumArrayLenItCounts,
+          "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+          "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+          "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+          "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+                        cl::desc("Maximum number of iterations SCEV will "
+                                 "symbolically execute a constant "
+                                 "derived loop"),
+                        cl::init(100));
+
+// FIXME: Enable this with XDEBUG when the test suite is clean.
+static cl::opt<bool>
+VerifySCEV("verify-scev",
+           cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+
+//===----------------------------------------------------------------------===//
+//                           SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+LLVM_DUMP_METHOD
+void SCEV::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+void SCEV::print(raw_ostream &OS) const {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
+  case scConstant:
+    cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
+    return;
+  case scTruncate: {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+    const SCEV *Op = Trunc->getOperand();
+    OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+       << *Trunc->getType() << ")";
+    return;
+  }
+  case scZeroExtend: {
+    const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+    const SCEV *Op = ZExt->getOperand();
+    OS << "(zext " << *Op->getType() << " " << *Op << " to "
+       << *ZExt->getType() << ")";
+    return;
+  }
+  case scSignExtend: {
+    const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+    const SCEV *Op = SExt->getOperand();
+    OS << "(sext " << *Op->getType() << " " << *Op << " to "
+       << *SExt->getType() << ")";
+    return;
+  }
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+    OS << "{" << *AR->getOperand(0);
+    for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+      OS << ",+," << *AR->getOperand(i);
+    OS << "}<";
+    if (AR->getNoWrapFlags(FlagNUW))
+      OS << "nuw><";
+    if (AR->getNoWrapFlags(FlagNSW))
+      OS << "nsw><";
+    if (AR->getNoWrapFlags(FlagNW) &&
+        !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+      OS << "nw><";
+    AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+    OS << ">";
+    return;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+    const char *OpStr = nullptr;
+    switch (NAry->getSCEVType()) {
+    case scAddExpr: OpStr = " + "; break;
+    case scMulExpr: OpStr = " * "; break;
+    case scUMaxExpr: OpStr = " umax "; break;
+    case scSMaxExpr: OpStr = " smax "; break;
+    }
+    OS << "(";
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      OS << **I;
+      if (std::next(I) != E)
+        OS << OpStr;
+    }
+    OS << ")";
+    switch (NAry->getSCEVType()) {
+    case scAddExpr:
+    case scMulExpr:
+      if (NAry->getNoWrapFlags(FlagNUW))
+        OS << "<nuw>";
+      if (NAry->getNoWrapFlags(FlagNSW))
+        OS << "<nsw>";
+    }
+    return;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+    OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+    return;
+  }
+  case scUnknown: {
+    const SCEVUnknown *U = cast<SCEVUnknown>(this);
+    Type *AllocTy;
+    if (U->isSizeOf(AllocTy)) {
+      OS << "sizeof(" << *AllocTy << ")";
+      return;
+    }
+    if (U->isAlignOf(AllocTy)) {
+      OS << "alignof(" << *AllocTy << ")";
+      return;
+    }
+
+    Type *CTy;
+    Constant *FieldNo;
+    if (U->isOffsetOf(CTy, FieldNo)) {
+      OS << "offsetof(" << *CTy << ", ";
+      FieldNo->printAsOperand(OS, false);
+      OS << ")";
+      return;
+    }
+
+    // Otherwise just print it normally.
+    U->getValue()->printAsOperand(OS, false);
+    return;
+  }
+  case scCouldNotCompute:
+    OS << "***COULDNOTCOMPUTE***";
+    return;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+Type *SCEV::getType() const {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
+  case scConstant:
+    return cast<SCEVConstant>(this)->getType();
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return cast<SCEVCastExpr>(this)->getType();
+  case scAddRecExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr:
+    return cast<SCEVNAryExpr>(this)->getType();
+  case scAddExpr:
+    return cast<SCEVAddExpr>(this)->getType();
+  case scUDivExpr:
+    return cast<SCEVUDivExpr>(this)->getType();
+  case scUnknown:
+    return cast<SCEVUnknown>(this)->getType();
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+bool SCEV::isZero() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isZero();
+  return false;
+}
+
+bool SCEV::isOne() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isOne();
+  return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isAllOnesValue();
+  return false;
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getAPInt().isNegative();
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+  return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(scConstant);
+  ID.AddPointer(V);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
+  return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
+  IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+                           unsigned SCEVTy, const SCEV *op, Type *ty)
+  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+                                   const SCEV *op, Type *ty)
+  : SCEVCastExpr(ID, scTruncate, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, Type *ty)
+  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, Type *ty)
+  : SCEVCastExpr(ID, scSignExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Release the value.
+  setValPtr(nullptr);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Update this SCEVUnknown to point to the new value. This is needed
+  // because there may still be outstanding SCEVs which still point to
+  // this SCEVUnknown.
+  setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getNumOperands() == 2)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+            if (CI->isOne()) {
+              AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+                                 ->getElementType();
+              return true;
+            }
+
+  return false;
+}
+
+bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue()) {
+          Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          if (StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked() &&
+                CE->getNumOperands() == 3 &&
+                CE->getOperand(1)->isNullValue()) {
+              if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+                if (CI->isOne() &&
+                    STy->getNumElements() == 2 &&
+                    STy->getElementType(0)->isIntegerTy(1)) {
+                  AllocTy = STy->getElementType(1);
+                  return true;
+                }
+            }
+        }
+
+  return false;
+}
+
+bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getNumOperands() == 3 &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getOperand(1)->isNullValue()) {
+          Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          // Ignore vector types here so that ScalarEvolutionExpander doesn't
+          // emit getelementptrs that index into vectors.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            CTy = Ty;
+            FieldNo = CE->getOperand(2);
+            return true;
+          }
+        }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                               SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+/// than the complexity of the RHS.  This comparator is used to canonicalize
+/// expressions.
+class SCEVComplexityCompare {
+  const LoopInfo *const LI;
+public:
+  explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+  // Return true or false if LHS is less than, or at least RHS, respectively.
+  bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+    return compare(LHS, RHS) < 0;
+  }
+
+  // Return negative, zero, or positive, if LHS is less than, equal to, or
+  // greater than RHS, respectively. A three-way result allows recursive
+  // comparisons to be more efficient.
+  int compare(const SCEV *LHS, const SCEV *RHS) const {
+    // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+    if (LHS == RHS)
+      return 0;
+
+    // Primarily, sort the SCEVs by their getSCEVType().
+    unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+    if (LType != RType)
+      return (int)LType - (int)RType;
+
+    // Aside from the getSCEVType() ordering, the particular ordering
+    // isn't very important except that it's beneficial to be consistent,
+    // so that (a + b) and (b + a) don't end up as different expressions.
+    switch (static_cast<SCEVTypes>(LType)) {
+    case scUnknown: {
+      const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+      const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+      // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+      // not as complete as it could be.
+      const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+      // Order pointer values after integer values. This helps SCEVExpander
+      // form GEPs.
+      bool LIsPointer = LV->getType()->isPointerTy(),
+        RIsPointer = RV->getType()->isPointerTy();
+      if (LIsPointer != RIsPointer)
+        return (int)LIsPointer - (int)RIsPointer;
+
+      // Compare getValueID values.
+      unsigned LID = LV->getValueID(),
+        RID = RV->getValueID();
+      if (LID != RID)
+        return (int)LID - (int)RID;
+
+      // Sort arguments by their position.
+      if (const Argument *LA = dyn_cast<Argument>(LV)) {
+        const Argument *RA = cast<Argument>(RV);
+        unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+        return (int)LArgNo - (int)RArgNo;
+      }
+
+      // For instructions, compare their loop depth, and their operand
+      // count.  This is pretty loose.
+      if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+        const Instruction *RInst = cast<Instruction>(RV);
+
+        // Compare loop depths.
+        const BasicBlock *LParent = LInst->getParent(),
+          *RParent = RInst->getParent();
+        if (LParent != RParent) {
+          unsigned LDepth = LI->getLoopDepth(LParent),
+            RDepth = LI->getLoopDepth(RParent);
+          if (LDepth != RDepth)
+            return (int)LDepth - (int)RDepth;
+        }
+
+        // Compare the number of operands.
+        unsigned LNumOps = LInst->getNumOperands(),
+          RNumOps = RInst->getNumOperands();
+        return (int)LNumOps - (int)RNumOps;
+      }
+
+      return 0;
+    }
+
+    case scConstant: {
+      const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+      const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+      // Compare constant values.
+      const APInt &LA = LC->getAPInt();
+      const APInt &RA = RC->getAPInt();
+      unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+      if (LBitWidth != RBitWidth)
+        return (int)LBitWidth - (int)RBitWidth;
+      return LA.ult(RA) ? -1 : 1;
+    }
+
+    case scAddRecExpr: {
+      const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+      const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+      // Compare addrec loop depths.
+      const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+      if (LLoop != RLoop) {
+        unsigned LDepth = LLoop->getLoopDepth(),
+          RDepth = RLoop->getLoopDepth();
+        if (LDepth != RDepth)
+          return (int)LDepth - (int)RDepth;
+      }
+
+      // Addrec complexity grows with operand count.
+      unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+      if (LNumOps != RNumOps)
+        return (int)LNumOps - (int)RNumOps;
+
+      // Lexicographically compare.
+      for (unsigned i = 0; i != LNumOps; ++i) {
+        long X = compare(LA->getOperand(i), RA->getOperand(i));
+        if (X != 0)
+          return X;
+      }
+
+      return 0;
+    }
+
+    case scAddExpr:
+    case scMulExpr:
+    case scSMaxExpr:
+    case scUMaxExpr: {
+      const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+      const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+      // Lexicographically compare n-ary expressions.
+      unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+      if (LNumOps != RNumOps)
+        return (int)LNumOps - (int)RNumOps;
+
+      for (unsigned i = 0; i != LNumOps; ++i) {
+        if (i >= RNumOps)
+          return 1;
+        long X = compare(LC->getOperand(i), RC->getOperand(i));
+        if (X != 0)
+          return X;
+      }
+      return (int)LNumOps - (int)RNumOps;
+    }
+
+    case scUDivExpr: {
+      const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+      const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+      // Lexicographically compare udiv expressions.
+      long X = compare(LC->getLHS(), RC->getLHS());
+      if (X != 0)
+        return X;
+      return compare(LC->getRHS(), RC->getRHS());
+    }
+
+    case scTruncate:
+    case scZeroExtend:
+    case scSignExtend: {
+      const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+      const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+      // Compare cast expressions by operand.
+      return compare(LC->getOperand(), RC->getOperand());
+    }
+
+    case scCouldNotCompute:
+      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    }
+    llvm_unreachable("Unknown SCEV kind!");
+  }
+};
+}  // end anonymous namespace
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine.  In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+                              LoopInfo *LI) {
+  if (Ops.size() < 2) return;  // Noop
+  if (Ops.size() == 2) {
+    // This is the common case, which also happens to be trivially simple.
+    // Special case it.
+    const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+    if (SCEVComplexityCompare(LI)(RHS, LHS))
+      std::swap(LHS, RHS);
+    return;
+  }
+
+  // Do the rough sort by complexity.
+  std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+  // Now that we are sorted by complexity, group elements of the same
+  // complexity.  Note that this is, at worst, N^2, but the vector is likely to
+  // be extremely short in practice.  Note that we take this approach because we
+  // do not want to depend on the addresses of the objects we are grouping.
+  for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+    const SCEV *S = Ops[i];
+    unsigned Complexity = S->getSCEVType();
+
+    // If there are any objects of the same complexity and same value as this
+    // one, group them.
+    for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+      if (Ops[j] == S) { // Found a duplicate.
+        // Move it to immediately after i'th element.
+        std::swap(Ops[i+1], Ops[j]);
+        ++i;   // no need to rescan it.
+        if (i == e-2) return;  // Done!
+      }
+    }
+  }
+}
+
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+  struct FindSCEVSize {
+    int Size;
+    FindSCEVSize() : Size(0) {}
+
+    bool follow(const SCEV *S) {
+      ++Size;
+      // Keep looking at all operands of S.
+      return true;
+    }
+    bool isDone() const {
+      return false;
+    }
+  };
+
+  FindSCEVSize F;
+  SCEVTraversal<FindSCEVSize> ST(F);
+  ST.visitAll(S);
+  return F.Size;
+}
+
+namespace {
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
+public:
+  // Computes the Quotient and Remainder of the division of Numerator by
+  // Denominator.
+  static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+                     const SCEV *Denominator, const SCEV **Quotient,
+                     const SCEV **Remainder) {
+    assert(Numerator && Denominator && "Uninitialized SCEV");
+
+    SCEVDivision D(SE, Numerator, Denominator);
+
+    // Check for the trivial case here to avoid having to check for it in the
+    // rest of the code.
+    if (Numerator == Denominator) {
+      *Quotient = D.One;
+      *Remainder = D.Zero;
+      return;
+    }
+
+    if (Numerator->isZero()) {
+      *Quotient = D.Zero;
+      *Remainder = D.Zero;
+      return;
+    }
+
+    // A simple case when N/1. The quotient is N.
+    if (Denominator->isOne()) {
+      *Quotient = Numerator;
+      *Remainder = D.Zero;
+      return;
+    }
+
+    // Split the Denominator when it is a product.
+    if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+      const SCEV *Q, *R;
+      *Quotient = Numerator;
+      for (const SCEV *Op : T->operands()) {
+        divide(SE, *Quotient, Op, &Q, &R);
+        *Quotient = Q;
+
+        // Bail out when the Numerator is not divisible by one of the terms of
+        // the Denominator.
+        if (!R->isZero()) {
+          *Quotient = D.Zero;
+          *Remainder = Numerator;
+          return;
+        }
+      }
+      *Remainder = D.Zero;
+      return;
+    }
+
+    D.visit(Numerator);
+    *Quotient = D.Quotient;
+    *Remainder = D.Remainder;
+  }
+
+  // Except in the trivial case described above, we do not know how to divide
+  // Expr by Denominator for the following functions with empty implementation.
+  void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+  void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+  void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+  void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+  void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+  void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+  void visitUnknown(const SCEVUnknown *Numerator) {}
+  void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+  void visitConstant(const SCEVConstant *Numerator) {
+    if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+      APInt NumeratorVal = Numerator->getAPInt();
+      APInt DenominatorVal = D->getAPInt();
+      uint32_t NumeratorBW = NumeratorVal.getBitWidth();
+      uint32_t DenominatorBW = DenominatorVal.getBitWidth();
+
+      if (NumeratorBW > DenominatorBW)
+        DenominatorVal = DenominatorVal.sext(NumeratorBW);
+      else if (NumeratorBW < DenominatorBW)
+        NumeratorVal = NumeratorVal.sext(DenominatorBW);
+
+      APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
+      APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
+      APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
+      Quotient = SE.getConstant(QuotientVal);
+      Remainder = SE.getConstant(RemainderVal);
+      return;
+    }
+  }
+
+  void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+    const SCEV *StartQ, *StartR, *StepQ, *StepR;
+    if (!Numerator->isAffine())
+      return cannotDivide(Numerator);
+    divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+    divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+    // Bail out if the types do not match.
+    Type *Ty = Denominator->getType();
+    if (Ty != StartQ->getType() || Ty != StartR->getType() ||
+        Ty != StepQ->getType() || Ty != StepR->getType())
+      return cannotDivide(Numerator);
+    Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+                                Numerator->getNoWrapFlags());
+    Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+                                 Numerator->getNoWrapFlags());
+  }
+
+  void visitAddExpr(const SCEVAddExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs, Rs;
+    Type *Ty = Denominator->getType();
+
+    for (const SCEV *Op : Numerator->operands()) {
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType() || Ty != R->getType())
+        return cannotDivide(Numerator);
+
+      Qs.push_back(Q);
+      Rs.push_back(R);
+    }
+
+    if (Qs.size() == 1) {
+      Quotient = Qs[0];
+      Remainder = Rs[0];
+      return;
+    }
+
+    Quotient = SE.getAddExpr(Qs);
+    Remainder = SE.getAddExpr(Rs);
+  }
+
+  void visitMulExpr(const SCEVMulExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs;
+    Type *Ty = Denominator->getType();
+
+    bool FoundDenominatorTerm = false;
+    for (const SCEV *Op : Numerator->operands()) {
+      // Bail out if types do not match.
+      if (Ty != Op->getType())
+        return cannotDivide(Numerator);
+
+      if (FoundDenominatorTerm) {
+        Qs.push_back(Op);
+        continue;
+      }
+
+      // Check whether Denominator divides one of the product operands.
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+      if (!R->isZero()) {
+        Qs.push_back(Op);
+        continue;
+      }
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType())
+        return cannotDivide(Numerator);
+
+      FoundDenominatorTerm = true;
+      Qs.push_back(Q);
+    }
+
+    if (FoundDenominatorTerm) {
+      Remainder = Zero;
+      if (Qs.size() == 1)
+        Quotient = Qs[0];
+      else
+        Quotient = SE.getMulExpr(Qs);
+      return;
+    }
+
+    if (!isa<SCEVUnknown>(Denominator))
+      return cannotDivide(Numerator);
+
+    // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+    ValueToValueMap RewriteMap;
+    RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+        cast<SCEVConstant>(Zero)->getValue();
+    Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+    if (Remainder->isZero()) {
+      // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+      RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+          cast<SCEVConstant>(One)->getValue();
+      Quotient =
+          SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+      return;
+    }
+
+    // Quotient is (Numerator - Remainder) divided by Denominator.
+    const SCEV *Q, *R;
+    const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+    // This SCEV does not seem to simplify: fail the division here.
+    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
+      return cannotDivide(Numerator);
+    divide(SE, Diff, Denominator, &Q, &R);
+    if (R != Zero)
+      return cannotDivide(Numerator);
+    Quotient = Q;
+  }
+
+private:
+  SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
+               const SCEV *Denominator)
+      : SE(S), Denominator(Denominator) {
+    Zero = SE.getZero(Denominator->getType());
+    One = SE.getOne(Denominator->getType());
+
+    // We generally do not know how to divide Expr by Denominator. We
+    // initialize the division to a "cannot divide" state to simplify the rest
+    // of the code.
+    cannotDivide(Numerator);
+  }
+
+  // Convenience function for giving up on the division. We set the quotient to
+  // be equal to zero and the remainder to be equal to the numerator.
+  void cannotDivide(const SCEV *Numerator) {
+    Quotient = Zero;
+    Remainder = Numerator;
+  }
+
+  ScalarEvolution &SE;
+  const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+//                      Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K).  The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+                                       ScalarEvolution &SE,
+                                       Type *ResultTy) {
+  // Handle the simplest case efficiently.
+  if (K == 1)
+    return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+  // We are using the following formula for BC(It, K):
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+  //
+  // Suppose, W is the bitwidth of the return value.  We must be prepared for
+  // overflow.  Hence, we must assure that the result of our computation is
+  // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
+  // safe in modular arithmetic.
+  //
+  // However, this code doesn't use exactly that formula; the formula it uses
+  // is something like the following, where T is the number of factors of 2 in
+  // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+  // exponentiation:
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+  //
+  // This formula is trivially equivalent to the previous formula.  However,
+  // this formula can be implemented much more efficiently.  The trick is that
+  // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+  // arithmetic.  To do exact division in modular arithmetic, all we have
+  // to do is multiply by the inverse.  Therefore, this step can be done at
+  // width W.
+  //
+  // The next issue is how to safely do the division by 2^T.  The way this
+  // is done is by doing the multiplication step at a width of at least W + T
+  // bits.  This way, the bottom W+T bits of the product are accurate. Then,
+  // when we perform the division by 2^T (which is equivalent to a right shift
+  // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
+  // truncated out after the division by 2^T.
+  //
+  // In comparison to just directly using the first formula, this technique
+  // is much more efficient; using the first formula requires W * K bits,
+  // but this formula less than W + K bits. Also, the first formula requires
+  // a division step, whereas this formula only requires multiplies and shifts.
+  //
+  // It doesn't matter whether the subtraction step is done in the calculation
+  // width or the input iteration count's width; if the subtraction overflows,
+  // the result must be zero anyway.  We prefer here to do it in the width of
+  // the induction variable because it helps a lot for certain cases; CodeGen
+  // isn't smart enough to ignore the overflow, which leads to much less
+  // efficient code if the width of the subtraction is wider than the native
+  // register width.
+  //
+  // (It's possible to not widen at all by pulling out factors of 2 before
+  // the multiplication; for example, K=2 can be calculated as
+  // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+  // extra arithmetic, so it's not an obvious win, and it gets
+  // much more complicated for K > 3.)
+
+  // Protection from insane SCEVs; this bound is conservative,
+  // but it probably doesn't matter.
+  if (K > 1000)
+    return SE.getCouldNotCompute();
+
+  unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+  // Calculate K! / 2^T and T; we divide out the factors of two before
+  // multiplying for calculating K! / 2^T to avoid overflow.
+  // Other overflow doesn't matter because we only care about the bottom
+  // W bits of the result.
+  APInt OddFactorial(W, 1);
+  unsigned T = 1;
+  for (unsigned i = 3; i <= K; ++i) {
+    APInt Mult(W, i);
+    unsigned TwoFactors = Mult.countTrailingZeros();
+    T += TwoFactors;
+    Mult = Mult.lshr(TwoFactors);
+    OddFactorial *= Mult;
+  }
+
+  // We need at least W + T bits for the multiplication step
+  unsigned CalculationBits = W + T;
+
+  // Calculate 2^T, at width T+W.
+  APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
+
+  // Calculate the multiplicative inverse of K! / 2^T;
+  // this multiplication factor will perform the exact division by
+  // K! / 2^T.
+  APInt Mod = APInt::getSignedMinValue(W+1);
+  APInt MultiplyFactor = OddFactorial.zext(W+1);
+  MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+  MultiplyFactor = MultiplyFactor.trunc(W);
+
+  // Calculate the product, at width T+W
+  IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+                                                      CalculationBits);
+  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+  for (unsigned i = 1; i != K; ++i) {
+    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+    Dividend = SE.getMulExpr(Dividend,
+                             SE.getTruncateOrZeroExtend(S, CalculationTy));
+  }
+
+  // Divide by 2^T
+  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+  // Truncate the result, and divide by K! / 2^T.
+
+  return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+                       SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number.  We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+                                                ScalarEvolution &SE) const {
+  const SCEV *Result = getStart();
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    // The computation is correct in the face of overflow provided that the
+    // multiplication is performed _after_ the evaluation of the binomial
+    // coefficient.
+    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+    if (isa<SCEVCouldNotCompute>(Coeff))
+      return Coeff;
+
+    Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+  }
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                    SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+                                             Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+         "This is not a truncating conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
+
+  // trunc(trunc(x)) --> trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+    return getTruncateExpr(ST->getOperand(), Ty);
+
+  // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+  // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+  // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+  // eliminate all the truncates, or we replace other casts with truncates.
+  if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+      if (!isa<SCEVCastExpr>(SA->getOperand(i)))
+        hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getAddExpr(Operands);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+  // eliminate all the truncates, or we replace other casts with truncates.
+  if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+      if (!isa<SCEVCastExpr>(SM->getOperand(i)))
+        hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getMulExpr(Operands);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // If the input value is a chrec scev, truncate the chrec's operands.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    for (const SCEV *Op : AddRec->operands())
+      Operands.push_back(getTruncateExpr(Op, Ty));
+    return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
+  }
+
+  // The cast wasn't folded; create an explicit cast node. We can reuse
+  // the existing insert position since if we get here, we won't have
+  // made any changes which would invalidate it.
+  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+                                                 Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the
+// loop does not exceed this limit before incrementing.
+static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
+                                                 ICmpInst::Predicate *Pred,
+                                                 ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  if (SE->isKnownPositive(Step)) {
+    *Pred = ICmpInst::ICMP_SLT;
+    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMax());
+  }
+  if (SE->isKnownNegative(Step)) {
+    *Pred = ICmpInst::ICMP_SGT;
+    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMin());
+  }
+  return nullptr;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// unsigned overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
+                                                   ICmpInst::Predicate *Pred,
+                                                   ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  *Pred = ICmpInst::ICMP_ULT;
+
+  return SE->getConstant(APInt::getMinValue(BitWidth) -
+                         SE->getUnsignedRange(Step).getUnsignedMax());
+}
+
+namespace {
+
+struct ExtendOpTraitsBase {
+  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+};
+
+// Used to make code generic over signed and unsigned overflow.
+template <typename ExtendOp> struct ExtendOpTraits {
+  // Members present:
+  //
+  // static const SCEV::NoWrapFlags WrapType;
+  //
+  // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
+  //
+  // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+  //                                           ICmpInst::Predicate *Pred,
+  //                                           ScalarEvolution *SE);
+};
+
+template <>
+struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
+  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
+
+  static const GetExtendExprTy GetExtendExpr;
+
+  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                             ICmpInst::Predicate *Pred,
+                                             ScalarEvolution *SE) {
+    return getSignedOverflowLimitForStep(Step, Pred, SE);
+  }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+    SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+
+template <>
+struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
+  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
+
+  static const GetExtendExprTy GetExtendExpr;
+
+  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                             ICmpInst::Predicate *Pred,
+                                             ScalarEvolution *SE) {
+    return getUnsignedOverflowLimitForStep(Step, Pred, SE);
+  }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+    SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+}
+
+// The recurrence AR has been shown to have no signed/unsigned wrap or something
+// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
+// easily prove NSW/NUW for its preincrement or postincrement sibling. This
+// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
+// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
+// expression "Step + sext/zext(PreIncAR)" is congruent with
+// "sext/zext(PostIncAR)"
+template <typename ExtendOpTy>
+static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
+                                        ScalarEvolution *SE) {
+  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+  const Loop *L = AR->getLoop();
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Check for a simple looking step prior to loop entry.
+  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+  if (!SA)
+    return nullptr;
+
+  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+  // subtraction is expensive. For this purpose, perform a quick and dirty
+  // difference, by checking for Step in the operand list.
+  SmallVector<const SCEV *, 4> DiffOps;
+  for (const SCEV *Op : SA->operands())
+    if (Op != Step)
+      DiffOps.push_back(Op);
+
+  if (DiffOps.size() == SA->getNumOperands())
+    return nullptr;
+
+  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
+  // `Step`:
+
+  // 1. NSW/NUW flags on the step increment.
+  auto PreStartFlags =
+    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
+  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
+  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+      SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
+  // "S+X does not sign/unsign-overflow".
+  //
+
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
+      !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
+    return PreStart;
+
+  // 2. Direct overflow check on the step operation's expression.
+  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+  const SCEV *OperandExtendedStart =
+      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
+                     (SE->*GetExtendExpr)(Step, WideTy));
+  if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+    if (PreAR && AR->getNoWrapFlags(WrapType)) {
+      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
+      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
+      // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
+      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
+    }
+    return PreStart;
+  }
+
+  // 3. Loop precondition.
+  ICmpInst::Predicate Pred;
+  const SCEV *OverflowLimit =
+      ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
+
+  if (OverflowLimit &&
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
+    return PreStart;
+
+  return nullptr;
+}
+
+// Get the normalized zero or sign extended expression for this AddRec's Start.
+template <typename ExtendOpTy>
+static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
+                                        ScalarEvolution *SE) {
+  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+  if (!PreStart)
+    return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+
+  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
+                        (SE->*GetExtendExpr)(PreStart, Ty));
+}
+
+// Try to prove away overflow by looking at "nearby" add recurrences.  A
+// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
+// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
+//
+// Formally:
+//
+//     {S,+,X} == {S-T,+,X} + T
+//  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
+//
+// If ({S-T,+,X} + T) does not overflow  ... (1)
+//
+//  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
+//
+// If {S-T,+,X} does not overflow  ... (2)
+//
+//  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
+//      == {Ext(S-T)+Ext(T),+,Ext(X)}
+//
+// If (S-T)+T does not overflow  ... (3)
+//
+//  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
+//      == {Ext(S),+,Ext(X)} == LHS
+//
+// Thus, if (1), (2) and (3) are true for some T, then
+//   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
+//
+// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
+// does not overflow" restricted to the 0th iteration.  Therefore we only need
+// to check for (1) and (2).
+//
+// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
+// is `Delta` (defined below).
+//
+template <typename ExtendOpTy>
+bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
+                                                const SCEV *Step,
+                                                const Loop *L) {
+  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+
+  // We restrict `Start` to a constant to prevent SCEV from spending too much
+  // time here.  It is correct (but more expensive) to continue with a
+  // non-constant `Start` and do a general SCEV subtraction to compute
+  // `PreStart` below.
+  //
+  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
+  if (!StartC)
+    return false;
+
+  APInt StartAI = StartC->getAPInt();
+
+  for (unsigned Delta : {-2, -1, 1, 2}) {
+    const SCEV *PreStart = getConstant(StartAI - Delta);
+
+    FoldingSetNodeID ID;
+    ID.AddInteger(scAddRecExpr);
+    ID.AddPointer(PreStart);
+    ID.AddPointer(Step);
+    ID.AddPointer(L);
+    void *IP = nullptr;
+    const auto *PreAR =
+      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+
+    // Give up if we don't already have the add recurrence we need because
+    // actually constructing an add recurrence is relatively expensive.
+    if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
+      const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
+      ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+      const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
+          DeltaS, &Pred, this);
+      if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+                                               Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
+
+  // zext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // zext(trunc(x)) --> zext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all zero bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getUnsignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+            CR.zextOrTrunc(NewBits)))
+      return getTruncateOrZeroExtend(X, Ty);
+  }
+
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can zero extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->getNoWrapFlags(SCEV::FlagNUW))
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no unsigned overflow.
+          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
+          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+          const SCEV *WideMaxBECount =
+            getZeroExtendExpr(CastedMaxBECount, WideTy);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (ZAdd == OperandExtendedAdd) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+          // Similar to above, only this time treat the step value as signed.
+          // This covers loops that count down.
+          OperandExtendedAdd =
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
+                                  getSignExtendExpr(Step, WideTy)));
+          if (ZAdd == OperandExtendedAdd) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                      getUnsignedRange(Step).getUnsignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+        }
+      }
+
+      if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
+        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+      }
+    }
+
+  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
+    if (SA->getNoWrapFlags(SCEV::FlagNUW)) {
+      // If the addition does not unsign overflow then we can, by definition,
+      // commute the zero extension with the addition operation.
+      SmallVector<const SCEV *, 4> Ops;
+      for (const auto *Op : SA->operands())
+        Ops.push_back(getZeroExtendExpr(Op, Ty));
+      return getAddExpr(Ops, SCEV::FlagNUW);
+    }
+  }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+                                               Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
+
+  // sext(sext(x)) --> sext(x)
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getSignExtendExpr(SS->getOperand(), Ty);
+
+  // sext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // If the input value is provably positive, build a zext instead.
+  if (isKnownNonNegative(Op))
+    return getZeroExtendExpr(Op, Ty);
+
+  // sext(trunc(x)) --> sext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all sign bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getSignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+            CR.sextOrTrunc(NewBits)))
+      return getTruncateOrSignExtend(X, Ty);
+  }
+
+  // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
+  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    if (SA->getNumOperands() == 2) {
+      auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+      auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+      if (SMul && SC1) {
+        if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+          const APInt &C1 = SC1->getAPInt();
+          const APInt &C2 = SC2->getAPInt();
+          if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
+              C2.ugt(C1) && C2.isPowerOf2())
+            return getAddExpr(getSignExtendExpr(SC1, Ty),
+                              getSignExtendExpr(SMul, Ty));
+        }
+      }
+    }
+
+    // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
+    if (SA->getNoWrapFlags(SCEV::FlagNSW)) {
+      // If the addition does not sign overflow then we can, by definition,
+      // commute the sign extension with the addition operation.
+      SmallVector<const SCEV *, 4> Ops;
+      for (const auto *Op : SA->operands())
+        Ops.push_back(getSignExtendExpr(Op, Ty));
+      return getAddExpr(Ops, SCEV::FlagNSW);
+    }
+  }
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can sign extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->getNoWrapFlags(SCEV::FlagNSW))
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+            getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no signed overflow.
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
+          const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+          const SCEV *WideMaxBECount =
+            getZeroExtendExpr(CastedMaxBECount, WideTy);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
+                                  getSignExtendExpr(Step, WideTy)));
+          if (SAdd == OperandExtendedAdd) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+          // Similar to above, only this time treat the step value as unsigned.
+          // This covers loops that count up with an unsigned step.
+          OperandExtendedAdd =
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (SAdd == OperandExtendedAdd) {
+            // If AR wraps around then
+            //
+            //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
+            // => SAdd != OperandExtendedAdd
+            //
+            // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
+            // (SAdd == OperandExtendedAdd => AR is NW)
+
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+          }
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        ICmpInst::Predicate Pred;
+        const SCEV *OverflowLimit =
+            getSignedOverflowLimitForStep(Step, &Pred, this);
+        if (OverflowLimit &&
+            (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+             (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+              isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+                                          OverflowLimit)))) {
+          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+          return getAddRecExpr(
+              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+              getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+        }
+      }
+      // If Start and Step are constants, check if we can apply this
+      // transformation:
+      // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
+      auto *SC1 = dyn_cast<SCEVConstant>(Start);
+      auto *SC2 = dyn_cast<SCEVConstant>(Step);
+      if (SC1 && SC2) {
+        const APInt &C1 = SC1->getAPInt();
+        const APInt &C2 = SC2->getAPInt();
+        if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
+            C2.isPowerOf2()) {
+          Start = getSignExtendExpr(Start, Ty);
+          const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
+                                            AR->getNoWrapFlags());
+          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+        }
+      }
+
+      if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
+        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+            getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+      }
+    }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+                                              Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Sign-extend negative constants.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    if (SC->getAPInt().isNegative())
+      return getSignExtendExpr(Op, Ty);
+
+  // Peel off a truncate cast.
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+    const SCEV *NewOp = T->getOperand();
+    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+      return getAnyExtendExpr(NewOp, Ty);
+    return getTruncateOrNoop(NewOp, Ty);
+  }
+
+  // Next try a zext cast. If the cast is folded, use it.
+  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+  if (!isa<SCEVZeroExtendExpr>(ZExt))
+    return ZExt;
+
+  // Next try a sext cast. If the cast is folded, use it.
+  const SCEV *SExt = getSignExtendExpr(Op, Ty);
+  if (!isa<SCEVSignExtendExpr>(SExt))
+    return SExt;
+
+  // Force the cast to be folded into the operands of an addrec.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Ops;
+    for (const SCEV *Op : AR->operands())
+      Ops.push_back(getAnyExtendExpr(Op, Ty));
+    return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
+  }
+
+  // If the expression is obviously signed, use the sext cast value.
+  if (isa<SCEVSMaxExpr>(Op))
+    return SExt;
+
+  // Absent any other information, use the zext cast value.
+  return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+                             SmallVectorImpl<const SCEV *> &NewOps,
+                             APInt &AccumulatedConstant,
+                             const SCEV *const *Ops, size_t NumOperands,
+                             const APInt &Scale,
+                             ScalarEvolution &SE) {
+  bool Interesting = false;
+
+  // Iterate over the add operands. They are sorted, with constants first.
+  unsigned i = 0;
+  while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+    ++i;
+    // Pull a buried constant out to the outside.
+    if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+      Interesting = true;
+    AccumulatedConstant += Scale * C->getAPInt();
+  }
+
+  // Next comes everything else. We're especially interested in multiplies
+  // here, but they're in the middle, so just visit the rest with one loop.
+  for (; i != NumOperands; ++i) {
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+    if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+      APInt NewScale =
+          Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
+      if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+        // A multiplication of a constant with another add; recurse.
+        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+        Interesting |=
+          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                       Add->op_begin(), Add->getNumOperands(),
+                                       NewScale, SE);
+      } else {
+        // A multiplication of a constant with some other value. Update
+        // the map.
+        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        const SCEV *Key = SE.getMulExpr(MulOps);
+        auto Pair = M.insert(std::make_pair(Key, NewScale));
+        if (Pair.second) {
+          NewOps.push_back(Pair.first->first);
+        } else {
+          Pair.first->second += NewScale;
+          // The map already had an entry for this value, which may indicate
+          // a folding opportunity.
+          Interesting = true;
+        }
+      }
+    } else {
+      // An ordinary operand. Update the map.
+      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+        M.insert(std::make_pair(Ops[i], Scale));
+      if (Pair.second) {
+        NewOps.push_back(Pair.first->first);
+      } else {
+        Pair.first->second += Scale;
+        // The map already had an entry for this value, which may indicate
+        // a folding opportunity.
+        Interesting = true;
+      }
+    }
+  }
+
+  return Interesting;
+}
+
+// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
+// `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
+// can't-overflow flags for the operation if possible.
+static SCEV::NoWrapFlags
+StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
+                      const SmallVectorImpl<const SCEV *> &Ops,
+                      SCEV::NoWrapFlags Flags) {
+  using namespace std::placeholders;
+  typedef OverflowingBinaryOperator OBO;
+
+  bool CanAnalyze =
+      Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
+  (void)CanAnalyze;
+  assert(CanAnalyze && "don't call from other places!");
+
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap =
+      ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
+
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  auto IsKnownNonNegative = [&](const SCEV *S) {
+    return SE->isKnownNonNegative(S);
+  };
+
+  if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
+    Flags =
+        ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
+
+  SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
+
+  if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
+      Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
+
+    // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
+    // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
+
+    const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
+    if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
+      auto NSWRegion =
+        ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
+      if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
+        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+    }
+    if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
+      auto NUWRegion =
+        ConstantRange::makeNoWrapRegion(Instruction::Add, C,
+                                        OBO::NoUnsignedWrap);
+      if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
+        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+    }
+  }
+
+  return Flags;
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        SCEV::NoWrapFlags Flags) {
+  assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+         "only nuw or nsw allowed");
+  assert(!Ops.empty() && "Cannot get empty add!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVAddExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, &LI);
+
+  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
+      if (Ops.size() == 2) return Ops[0];
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant zero being added, strip it off.
+    if (LHSC->getValue()->isZero()) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Okay, check to see if the same value occurs in the operand list more than
+  // once.  If so, merge them together into an multiply expression.  Since we
+  // sorted the list, these values are required to be adjacent.
+  Type *Ty = Ops[0]->getType();
+  bool FoundMatch = false;
+  for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+    if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
+      // Scan ahead to count how many equal operands there are.
+      unsigned Count = 2;
+      while (i+Count != e && Ops[i+Count] == Ops[i])
+        ++Count;
+      // Merge the values into a multiply.
+      const SCEV *Scale = getConstant(Ty, Count);
+      const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+      if (Ops.size() == Count)
+        return Mul;
+      Ops[i] = Mul;
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+      --i; e -= Count - 1;
+      FoundMatch = true;
+    }
+  if (FoundMatch)
+    return getAddExpr(Ops, Flags);
+
+  // Check for truncates. If all the operands are truncated from the same
+  // type, see if factoring out the truncate would permit the result to be
+  // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+  // if the contents of the resulting outer trunc fold to something simple.
+  for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+    Type *DstType = Trunc->getType();
+    Type *SrcType = Trunc->getOperand()->getType();
+    SmallVector<const SCEV *, 8> LargeOps;
+    bool Ok = true;
+    // Check all the operands to see if they can be represented in the
+    // source type of the truncate.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+        if (T->getOperand()->getType() != SrcType) {
+          Ok = false;
+          break;
+        }
+        LargeOps.push_back(T->getOperand());
+      } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+      } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+        SmallVector<const SCEV *, 8> LargeMulOps;
+        for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+          if (const SCEVTruncateExpr *T =
+                dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+            if (T->getOperand()->getType() != SrcType) {
+              Ok = false;
+              break;
+            }
+            LargeMulOps.push_back(T->getOperand());
+          } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
+            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+          } else {
+            Ok = false;
+            break;
+          }
+        }
+        if (Ok)
+          LargeOps.push_back(getMulExpr(LargeMulOps));
+      } else {
+        Ok = false;
+        break;
+      }
+    }
+    if (Ok) {
+      // Evaluate the expression in the larger type.
+      const SCEV *Fold = getAddExpr(LargeOps, Flags);
+      // If it folds to something simple, use it. Otherwise, don't.
+      if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+        return getTruncateExpr(Fold, DstType);
+    }
+  }
+
+  // Skip past any other cast SCEVs.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+    ++Idx;
+
+  // If there are add operands they would be next.
+  if (Idx < Ops.size()) {
+    bool DeletedAdd = false;
+    while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+      // If we have an add, expand the add operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Add->op_begin(), Add->op_end());
+      DeletedAdd = true;
+    }
+
+    // If we deleted at least one add, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedAdd)
+      return getAddExpr(Ops);
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // Check to see if there are any folding opportunities present with
+  // operands multiplied by constant values.
+  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+    uint64_t BitWidth = getTypeSizeInBits(Ty);
+    DenseMap<const SCEV *, APInt> M;
+    SmallVector<const SCEV *, 8> NewOps;
+    APInt AccumulatedConstant(BitWidth, 0);
+    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                     Ops.data(), Ops.size(),
+                                     APInt(BitWidth, 1), *this)) {
+      struct APIntCompare {
+        bool operator()(const APInt &LHS, const APInt &RHS) const {
+          return LHS.ult(RHS);
+        }
+      };
+
+      // Some interesting folding opportunity is present, so its worthwhile to
+      // re-generate the operands list. Group the operands by constant scale,
+      // to avoid multiplying by the same constant scale multiple times.
+      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+      for (const SCEV *NewOp : NewOps)
+        MulOpLists[M.find(NewOp)->second].push_back(NewOp);
+      // Re-generate the operands list.
+      Ops.clear();
+      if (AccumulatedConstant != 0)
+        Ops.push_back(getConstant(AccumulatedConstant));
+      for (auto &MulOp : MulOpLists)
+        if (MulOp.first != 0)
+          Ops.push_back(getMulExpr(getConstant(MulOp.first),
+                                   getAddExpr(MulOp.second)));
+      if (Ops.empty())
+        return getZero(Ty);
+      if (Ops.size() == 1)
+        return Ops[0];
+      return getAddExpr(Ops);
+    }
+  }
+
+  // If we are adding something to a multiply expression, make sure the
+  // something is not already an operand of the multiply.  If so, merge it into
+  // the multiply.
+  for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+    const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+    for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+      const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+      if (isa<SCEVConstant>(MulOpSCEV))
+        continue;
+      for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+        if (MulOpSCEV == Ops[AddOp]) {
+          // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
+          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+          if (Mul->getNumOperands() != 2) {
+            // If the multiply has more than two operands, we must get the
+            // Y*Z term.
+            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                Mul->op_begin()+MulOp);
+            MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+            InnerMul = getMulExpr(MulOps);
+          }
+          const SCEV *One = getOne(Ty);
+          const SCEV *AddOne = getAddExpr(One, InnerMul);
+          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+          if (Ops.size() == 2) return OuterMul;
+          if (AddOp < Idx) {
+            Ops.erase(Ops.begin()+AddOp);
+            Ops.erase(Ops.begin()+Idx-1);
+          } else {
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+AddOp-1);
+          }
+          Ops.push_back(OuterMul);
+          return getAddExpr(Ops);
+        }
+
+      // Check this multiply against other multiplies being added together.
+      for (unsigned OtherMulIdx = Idx+1;
+           OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+           ++OtherMulIdx) {
+        const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+        // If MulOp occurs in OtherMul, we can fold the two multiplies
+        // together.
+        for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+             OMulOp != e; ++OMulOp)
+          if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+            // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+            if (Mul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                  Mul->op_begin()+MulOp);
+              MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+              InnerMul1 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+            if (OtherMul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                                                  OtherMul->op_begin()+OMulOp);
+              MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+              InnerMul2 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            if (Ops.size() == 2) return OuterMul;
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+OtherMulIdx-1);
+            Ops.push_back(OuterMul);
+            return getAddExpr(Ops);
+          }
+      }
+    }
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this add and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
+      LIOps.push_back(AddRec->getStart());
+
+      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                             AddRec->op_end());
+      AddRecOps[0] = getAddExpr(LIOps);
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer add and the inner addrec are guaranteed to have no overflow.
+      // Always propagate NW.
+      Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, add the folded AddRec by the non-invariant parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getAddExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // added together.  If so, we can fold them.
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx)
+      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
+        SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                               AddRec->op_end());
+        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+             ++OtherIdx)
+          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+            if (OtherAddRec->getLoop() == AddRecLoop) {
+              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                   i != e; ++i) {
+                if (i >= AddRecOps.size()) {
+                  AddRecOps.append(OtherAddRec->op_begin()+i,
+                                   OtherAddRec->op_end());
+                  break;
+                }
+                AddRecOps[i] = getAddExpr(AddRecOps[i],
+                                          OtherAddRec->getOperand(i));
+              }
+              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+            }
+        // Step size has changed, so we cannot guarantee no self-wraparound.
+        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+        return getAddExpr(Ops);
+      }
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an add expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = nullptr;
+  SCEVAddExpr *S =
+    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+  uint64_t k = i*j;
+  if (j > 1 && k / j != i) Overflow = true;
+  return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient.  If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absence of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+  // We use the multiplicative formula:
+  //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+  // At each iteration, we take the n-th term of the numeral and divide by the
+  // (k-n)th term of the denominator.  This division will always produce an
+  // integral result, and helps reduce the chance of overflow in the
+  // intermediate computations. However, we can still overflow even when the
+  // final result would fit.
+
+  if (n == 0 || n == k) return 1;
+  if (k > n) return 0;
+
+  if (k > n/2)
+    k = n-k;
+
+  uint64_t r = 1;
+  for (uint64_t i = 1; i <= k; ++i) {
+    r = umul_ov(r, n-(i-1), Overflow);
+    r /= i;
+  }
+  return r;
+}
+
+/// Determine if any of the operands in this SCEV are a constant or if
+/// any of the add or multiply expressions in this SCEV contain a constant.
+static bool containsConstantSomewhere(const SCEV *StartExpr) {
+  SmallVector<const SCEV *, 4> Ops;
+  Ops.push_back(StartExpr);
+  while (!Ops.empty()) {
+    const SCEV *CurrentExpr = Ops.pop_back_val();
+    if (isa<SCEVConstant>(*CurrentExpr))
+      return true;
+
+    if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
+      const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
+      Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
+    }
+  }
+  return false;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        SCEV::NoWrapFlags Flags) {
+  assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+         "only nuw or nsw allowed");
+  assert(!Ops.empty() && "Cannot get empty mul!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVMulExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, &LI);
+
+  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+    // C1*(C2+V) -> C1*C2 + C1*V
+    if (Ops.size() == 2)
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+          // If any of Add's ops are Adds or Muls with a constant,
+          // apply this transformation as well.
+          if (Add->getNumOperands() == 2)
+            if (containsConstantSomewhere(Add))
+              return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+                                getMulExpr(LHSC, Add->getOperand(1)));
+
+    ++Idx;
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold =
+          ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant one being multiplied, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+      // If we have a multiply of zero, it will always be zero.
+      return Ops[0];
+    } else if (Ops[0]->isAllOnesValue()) {
+      // If we have a mul by -1 of an add, try distributing the -1 among the
+      // add operands.
+      if (Ops.size() == 2) {
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+          SmallVector<const SCEV *, 4> NewOps;
+          bool AnyFolded = false;
+          for (const SCEV *AddOp : Add->operands()) {
+            const SCEV *Mul = getMulExpr(Ops[0], AddOp);
+            if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+            NewOps.push_back(Mul);
+          }
+          if (AnyFolded)
+            return getAddExpr(NewOps);
+        } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+          // Negation preserves a recurrence's no self-wrap property.
+          SmallVector<const SCEV *, 4> Operands;
+          for (const SCEV *AddRecOp : AddRec->operands())
+            Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+
+          return getAddRecExpr(Operands, AddRec->getLoop(),
+                               AddRec->getNoWrapFlags(SCEV::FlagNW));
+        }
+      }
+    }
+
+    if (Ops.size() == 1)
+      return Ops[0];
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // If there are mul operands inline them all into this expression.
+  if (Idx < Ops.size()) {
+    bool DeletedMul = false;
+    while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+      // If we have an mul, expand the mul operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Mul->op_begin(), Mul->op_end());
+      DeletedMul = true;
+    }
+
+    // If we deleted at least one mul, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedMul)
+      return getMulExpr(Ops);
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this mul and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.reserve(AddRec->getNumOperands());
+      const SCEV *Scale = getMulExpr(LIOps);
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer mul and the inner addrec are guaranteed to have no overflow.
+      //
+      // No self-wrap cannot be guaranteed after changing the step size, but
+      // will be inferred if either NUW or NSW is true.
+      Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, multiply the folded AddRec by the non-invariant parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getMulExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // multiplied together.  If so, we can fold them.
+
+    // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+    // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+    //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+    //   ]]],+,...up to x=2n}.
+    // Note that the arguments to choose() are always integers with values
+    // known at compile time, never SCEV objects.
+    //
+    // The implementation avoids pointless extra computations when the two
+    // addrec's are of different length (mathematically, it's equivalent to
+    // an infinite stream of zeros on the right).
+    bool OpsModified = false;
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx) {
+      const SCEVAddRecExpr *OtherAddRec =
+        dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+      if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
+        continue;
+
+      bool Overflow = false;
+      Type *Ty = AddRec->getType();
+      bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+      SmallVector<const SCEV*, 7> AddRecOps;
+      for (int x = 0, xe = AddRec->getNumOperands() +
+             OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+        const SCEV *Term = getZero(Ty);
+        for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+          uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+          for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+                 ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+               z < ze && !Overflow; ++z) {
+            uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+            uint64_t Coeff;
+            if (LargerThan64Bits)
+              Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+            else
+              Coeff = Coeff1*Coeff2;
+            const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+            const SCEV *Term1 = AddRec->getOperand(y-z);
+            const SCEV *Term2 = OtherAddRec->getOperand(z);
+            Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+          }
+        }
+        AddRecOps.push_back(Term);
+      }
+      if (!Overflow) {
+        const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+                                              SCEV::FlagAnyWrap);
+        if (Ops.size() == 2) return NewAddRec;
+        Ops[Idx] = NewAddRec;
+        Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+        OpsModified = true;
+        AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+        if (!AddRec)
+          break;
+      }
+    }
+    if (OpsModified)
+      return getMulExpr(Ops);
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an mul expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scMulExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = nullptr;
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  assert(getEffectiveSCEVType(LHS->getType()) ==
+         getEffectiveSCEVType(RHS->getType()) &&
+         "SCEVUDivExpr operand types don't match!");
+
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+    if (RHSC->getValue()->equalsInt(1))
+      return LHS;                               // X udiv 1 --> x
+    // If the denominator is zero, the result of the udiv is undefined. Don't
+    // try to analyze it, because the resolution chosen here may differ from
+    // the resolution chosen in other parts of the compiler.
+    if (!RHSC->getValue()->isZero()) {
+      // Determine if the division can be folded into the operands of
+      // its operands.
+      // TODO: Generalize this to non-constants by using known-bits information.
+      Type *Ty = LHS->getType();
+      unsigned LZ = RHSC->getAPInt().countLeadingZeros();
+      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+      // For non-power-of-two values, effectively round the value up to the
+      // nearest power of two.
+      if (!RHSC->getAPInt().isPowerOf2())
+        ++MaxShiftAmt;
+      IntegerType *ExtTy =
+        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+        if (const SCEVConstant *Step =
+            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+          // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+          const APInt &StepInt = Step->getAPInt();
+          const APInt &DivInt = RHSC->getAPInt();
+          if (!StepInt.urem(DivInt) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+            SmallVector<const SCEV *, 4> Operands;
+            for (const SCEV *Op : AR->operands())
+              Operands.push_back(getUDivExpr(Op, RHS));
+            return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
+          }
+          /// Get a canonical UDivExpr for a recurrence.
+          /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+          // We can currently only fold X%N if X is constant.
+          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+          if (StartC && !DivInt.urem(StepInt) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+            const APInt &StartInt = StartC->getAPInt();
+            const APInt &StartRem = StartInt.urem(StepInt);
+            if (StartRem != 0)
+              LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+                                  AR->getLoop(), SCEV::FlagNW);
+          }
+        }
+      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+      if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (const SCEV *Op : M->operands())
+          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
+        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+          // Find an operand that's safely divisible.
+          for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = M->getOperand(i);
+            const SCEV *Div = getUDivExpr(Op, RHSC);
+            if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+                                                      M->op_end());
+              Operands[i] = Div;
+              return getMulExpr(Operands);
+            }
+          }
+      }
+      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+      if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (const SCEV *Op : A->operands())
+          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
+        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+          Operands.clear();
+          for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+            if (isa<SCEVUDivExpr>(Op) ||
+                getMulExpr(Op, RHS) != A->getOperand(i))
+              break;
+            Operands.push_back(Op);
+          }
+          if (Operands.size() == A->getNumOperands())
+            return getAddExpr(Operands);
+        }
+      }
+
+      // Fold if both operands are constant.
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+        Constant *LHSCV = LHSC->getValue();
+        Constant *RHSCV = RHSC->getValue();
+        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                   RHSCV)));
+      }
+    }
+  }
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUDivExpr);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+                                             LHS, RHS);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getAPInt().abs();
+  APInt B = C2->getAPInt().abs();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.zext(ABW);
+  else if (ABW < BBW)
+    A = A.zext(BBW);
+
+  return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+/// getUDivExactExpr - Get a canonical unsigned division expression, or
+/// something simpler if possible. There is no representation for an exact udiv
+/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
+/// We can't do this when it's not exact because the udiv may be clearing bits.
+const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
+                                              const SCEV *RHS) {
+  // TODO: we could try to find factors in all sorts of things, but for now we
+  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
+  // end of this file for inspiration.
+
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
+  if (!Mul)
+    return getUDivExpr(LHS, RHS);
+
+  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
+    // If the mulexpr multiplies by a constant, then that constant must be the
+    // first element of the mulexpr.
+    if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      if (LHSCst == RHSCst) {
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        return getMulExpr(Operands);
+      }
+
+      // We can't just assume that LHSCst divides RHSCst cleanly, it could be
+      // that there's a factor provided by one of the other terms. We need to
+      // check.
+      APInt Factor = gcd(LHSCst, RHSCst);
+      if (!Factor.isIntN(1)) {
+        LHSCst =
+            cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
+        RHSCst =
+            cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.push_back(LHSCst);
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        LHS = getMulExpr(Operands);
+        RHS = RHSCst;
+        Mul = dyn_cast<SCEVMulExpr>(LHS);
+        if (!Mul)
+          return getUDivExactExpr(LHS, RHS);
+      }
+    }
+  }
+
+  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
+    if (Mul->getOperand(i) == RHS) {
+      SmallVector<const SCEV *, 2> Operands;
+      Operands.append(Mul->op_begin(), Mul->op_begin() + i);
+      Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
+      return getMulExpr(Operands);
+    }
+  }
+
+  return getUDivExpr(LHS, RHS);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+                                           const Loop *L,
+                                           SCEV::NoWrapFlags Flags) {
+  SmallVector<const SCEV *, 4> Operands;
+  Operands.push_back(Start);
+  if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+    if (StepChrec->getLoop() == L) {
+      Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+      return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
+    }
+
+  Operands.push_back(Step);
+  return getAddRecExpr(Operands, L, Flags);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                               const Loop *L, SCEV::NoWrapFlags Flags) {
+  if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+  Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+           "SCEVAddRecExpr operand types don't match!");
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    assert(isLoopInvariant(Operands[i], L) &&
+           "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+  if (Operands.back()->isZero()) {
+    Operands.pop_back();
+    return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
+  }
+
+  // It's tempting to want to call getMaxBackedgeTakenCount count here and
+  // use that information to infer NUW and NSW flags. However, computing a
+  // BE count requires calling getAddRecExpr, so we may not yet have a
+  // meaningful BE count at this point (and if we don't, we'd be stuck
+  // with a SCEVCouldNotCompute as the cached BE count).
+
+  Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
+
+  // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+  if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+    const Loop *NestedLoop = NestedAR->getLoop();
+    if (L->contains(NestedLoop)
+            ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
+            : (!NestedLoop->contains(L) &&
+               DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
+      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+                                                  NestedAR->op_end());
+      Operands[0] = NestedAR->getStart();
+      // AddRecs require their operands be loop-invariant with respect to their
+      // loops. Don't perform this transformation if it would break this
+      // requirement.
+      bool AllInvariant = all_of(
+          Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
+
+      if (AllInvariant) {
+        // Create a recurrence for the outer loop with the same step size.
+        //
+        // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+        // inner recurrence has the same property.
+        SCEV::NoWrapFlags OuterFlags =
+          maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+        NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
+        AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
+          return isLoopInvariant(Op, NestedLoop);
+        });
+
+        if (AllInvariant) {
+          // Ok, both add recurrences are valid after the transformation.
+          //
+          // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+          // the outer recurrence has the same property.
+          SCEV::NoWrapFlags InnerFlags =
+            maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+          return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+        }
+      }
+      // Reset Operands to its original state.
+      Operands[0] = NestedAR;
+    }
+  }
+
+  // Okay, it looks like we really DO need an addrec expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddRecExpr);
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+  ID.AddPointer(L);
+  void *IP = nullptr;
+  SCEVAddRecExpr *S =
+    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+                                           O, Operands.size(), L);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+const SCEV *
+ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
+                            const SmallVectorImpl<const SCEV *> &IndexExprs,
+                            bool InBounds) {
+  // getSCEV(Base)->getType() has the same address space as Base->getType()
+  // because SCEV::getType() preserves the address space.
+  Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
+  // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
+  // instruction to its SCEV, because the Instruction may be guarded by control
+  // flow and the no-overflow bits may not be valid for the expression in any
+  // context. This can be fixed similarly to how these flags are handled for
+  // adds.
+  SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  const SCEV *TotalOffset = getZero(IntPtrTy);
+  // The address space is unimportant. The first thing we do on CurTy is getting
+  // its element type.
+  Type *CurTy = PointerType::getUnqual(PointeeType);
+  for (const SCEV *IndexExpr : IndexExprs) {
+    // Compute the (potentially symbolic) offset in bytes for this index.
+    if (StructType *STy = dyn_cast<StructType>(CurTy)) {
+      // For a struct, add the member offset.
+      ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
+      unsigned FieldNo = Index->getZExtValue();
+      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
+
+      // Add the field offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+
+      // Update CurTy to the type of the field at Index.
+      CurTy = STy->getTypeAtIndex(Index);
+    } else {
+      // Update CurTy to its element type.
+      CurTy = cast<SequentialType>(CurTy)->getElementType();
+      // For an array, add the element offset, explicitly scaled.
+      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
+      // Getelementptr indices are signed.
+      IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
+
+      // Multiply the index by the element size to compute the element offset.
+      const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
+
+      // Add the element offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+    }
+  }
+
+  // Add the total offset from all the GEP indices to the base.
+  return getAddExpr(BaseExpr, TotalOffset, Wrap);
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty smax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVSMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, &LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(
+          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+      // If we have an smax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first SMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is an SMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedSMax = false;
+    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(SMax->op_begin(), SMax->op_end());
+      DeletedSMax = true;
+    }
+
+    if (DeletedSMax)
+      return getSMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+  // Okay, it looks like we really DO need an smax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty umax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVUMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, &LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(
+          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+      // If we have an umax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first UMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is a UMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedUMax = false;
+    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(UMax->op_begin(), UMax->op_end());
+      DeletedUMax = true;
+    }
+
+    if (DeletedUMax)
+      return getUMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+  // Okay, it looks like we really DO need a umax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = nullptr;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~smax(~x, ~y) == smin(x, y).
+  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~umax(~x, ~y) == umin(x, y)
+  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
+  // We can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+                                             StructType *STy,
+                                             unsigned FieldNo) {
+  // We can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  return getConstant(
+      IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+  // Don't attempt to do anything other than create a SCEVUnknown object
+  // here.  createSCEV only calls getUnknown after checking for all other
+  // interesting possibilities, and any other code that calls getUnknown
+  // is doing so in order to hide a value from SCEV canonicalization.
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUnknown);
+  ID.AddPointer(V);
+  void *IP = nullptr;
+  if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+    assert(cast<SCEVUnknown>(S)->getValue() == V &&
+           "Stale SCEVUnknown in uniquing map!");
+    return S;
+  }
+  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+                                            FirstUnknown);
+  FirstUnknown = cast<SCEVUnknown>(S);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+//            Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(Type *Ty) const {
+  // Integers and pointers are always SCEVable.
+  return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+  return getDataLayout().getTypeSizeInBits(Ty);
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+  if (Ty->isIntegerTy())
+    return Ty;
+
+  // The only other support type is pointer.
+  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+  return getDataLayout().getIntPtrType(Ty);
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+  return CouldNotCompute.get();
+}
+
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
+  // Helper class working with SCEVTraversal to figure out if a SCEV contains
+  // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
+  // is set iff if find such SCEVUnknown.
+  //
+  struct FindInvalidSCEVUnknown {
+    bool FindOne;
+    FindInvalidSCEVUnknown() { FindOne = false; }
+    bool follow(const SCEV *S) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+      case scConstant:
+        return false;
+      case scUnknown:
+        if (!cast<SCEVUnknown>(S)->getValue())
+          FindOne = true;
+        return false;
+      default:
+        return true;
+      }
+    }
+    bool isDone() const { return FindOne; }
+  };
+
+  FindInvalidSCEVUnknown F;
+  SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
+  ST.visitAll(S);
+
+  return !F.FindOne;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+  const SCEV *S = getExistingSCEV(V);
+  if (S == nullptr) {
+    S = createSCEV(V);
+    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+  }
+  return S;
+}
+
+const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
+  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
+  if (I != ValueExprMap.end()) {
+    const SCEV *S = I->second;
+    if (checkValidity(S))
+      return S;
+    ValueExprMap.erase(I);
+  }
+  return nullptr;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+                                             SCEV::NoWrapFlags Flags) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+  Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  return getMulExpr(
+      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+  Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  const SCEV *AllOnes =
+                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+  return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                                          SCEV::NoWrapFlags Flags) {
+  // Fast path: X - X --> 0.
+  if (LHS == RHS)
+    return getZero(LHS->getType());
+
+  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+  // makes it so that we cannot make much use of NUW.
+  auto AddFlags = SCEV::FlagAnyWrap;
+  const bool RHSIsNotMinSigned =
+      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+    // Let M be the minimum representable signed value. Then (-1)*RHS
+    // signed-wraps if and only if RHS is M. That can happen even for
+    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+    // (-1)*RHS, we need to prove that RHS != M.
+    //
+    // If LHS is non-negative and we know that LHS - RHS does not
+    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+    // either by proving that RHS > M or that LHS >= 0.
+    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+      AddFlags = SCEV::FlagNSW;
+    }
+  }
+
+  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+  // RHS is NSW and LHS >= 0.
+  //
+  // The difficulty here is that the NSW flag may have been proven
+  // relative to a loop that is to be found in a recurrence in LHS and
+  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+  // larger scope than intended.
+  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+                                         Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or zero extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrZeroExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or sign extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrSignExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or any extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrAnyExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or noop with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+         "getTruncateOrNoop cannot extend!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+  // A pointer operand may evaluate to a nonpointer expression, such as null.
+  if (!V->getType()->isPointerTy())
+    return V;
+
+  if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+    return getPointerBase(Cast->getOperand());
+  } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+    const SCEV *PtrOp = nullptr;
+    for (const SCEV *NAryOp : NAry->operands()) {
+      if (NAryOp->getType()->isPointerTy()) {
+        // Cannot find the base of an expression with multiple pointer operands.
+        if (PtrOp)
+          return V;
+        PtrOp = NAryOp;
+      }
+    }
+    if (!PtrOp)
+      return V;
+    return getPointerBase(PtrOp);
+  }
+  return V;
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+                   SmallVectorImpl<Instruction *> &Worklist) {
+  // Push the def-use children onto the Worklist stack.
+  for (User *U : I->users())
+    Worklist.push_back(cast<Instruction>(U));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+  SmallVector<Instruction *, 16> Worklist;
+  PushDefUseChildren(PN, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  Visited.insert(PN);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I).second)
+      continue;
+
+    auto It = ValueExprMap.find_as(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      const SCEV *Old = It->second;
+
+      // Short-circuit the def-use traversal if the symbolic name
+      // ceases to appear in expressions.
+      if (Old != SymName && !hasOperand(Old, SymName))
+        continue;
+
+      // SCEVUnknown for a PHI either means that it has an unrecognized
+      // structure, it's a PHI that's in the progress of being computed
+      // by createNodeForPHI, or it's a single-value PHI. In the first case,
+      // additional loop trip count information isn't going to change anything.
+      // In the second case, createNodeForPHI will perform the necessary
+      // updates on its own when it gets to that point. In the third, we do
+      // want to forget the SCEVUnknown.
+      if (!isa<PHINode>(I) ||
+          !isa<SCEVUnknown>(Old) ||
+          (I != PN && Old == SymName)) {
+        forgetMemoizedResults(Old);
+        ValueExprMap.erase(It);
+      }
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+namespace {
+class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+                             ScalarEvolution &SE) {
+    SCEVInitRewriter Rewriter(L, SE);
+    const SCEV *Result = Rewriter.visit(Scev);
+    return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+  }
+
+  SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
+      : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+      Valid = false;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    // Only allow AddRecExprs for this loop.
+    if (Expr->getLoop() == L)
+      return Expr->getStart();
+    Valid = false;
+    return Expr;
+  }
+
+  bool isValid() { return Valid; }
+
+private:
+  const Loop *L;
+  bool Valid;
+};
+
+class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+                             ScalarEvolution &SE) {
+    SCEVShiftRewriter Rewriter(L, SE);
+    const SCEV *Result = Rewriter.visit(Scev);
+    return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+  }
+
+  SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
+      : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    // Only allow AddRecExprs for this loop.
+    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+      Valid = false;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (Expr->getLoop() == L && Expr->isAffine())
+      return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
+    Valid = false;
+    return Expr;
+  }
+  bool isValid() { return Valid; }
+
+private:
+  const Loop *L;
+  bool Valid;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
+  const Loop *L = LI.getLoopFor(PN->getParent());
+  if (!L || L->getHeader() != PN->getParent())
+    return nullptr;
+
+  // The loop may have multiple entrances or multiple exits; we can analyze
+  // this phi as an addrec if it has a unique entry value and a unique
+  // backedge value.
+  Value *BEValueV = nullptr, *StartValueV = nullptr;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (L->contains(PN->getIncomingBlock(i))) {
+      if (!BEValueV) {
+        BEValueV = V;
+      } else if (BEValueV != V) {
+        BEValueV = nullptr;
+        break;
+      }
+    } else if (!StartValueV) {
+      StartValueV = V;
+    } else if (StartValueV != V) {
+      StartValueV = nullptr;
+      break;
+    }
+  }
+  if (BEValueV && StartValueV) {
+    // While we are analyzing this PHI node, handle its value symbolically.
+    const SCEV *SymbolicName = getUnknown(PN);
+    assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+           "PHI node already processed?");
+    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+    // Using this symbolic name for the PHI, analyze the value coming around
+    // the back-edge.
+    const SCEV *BEValue = getSCEV(BEValueV);
+
+    // NOTE: If BEValue is loop invariant, we know that the PHI node just
+    // has a special value for the first iteration of the loop.
+
+    // If the value coming around the backedge is an add with the symbolic
+    // value we just inserted, then we found a simple induction variable!
+    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+      // If there is a single occurrence of the symbolic value, replace it
+      // with a recurrence.
+      unsigned FoundIndex = Add->getNumOperands();
+      for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+        if (Add->getOperand(i) == SymbolicName)
+          if (FoundIndex == e) {
+            FoundIndex = i;
+            break;
+          }
+
+      if (FoundIndex != Add->getNumOperands()) {
+        // Create an add with everything but the specified operand.
+        SmallVector<const SCEV *, 8> Ops;
+        for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+          if (i != FoundIndex)
+            Ops.push_back(Add->getOperand(i));
+        const SCEV *Accum = getAddExpr(Ops);
+
+        // This is not a valid addrec if the step amount is varying each
+        // loop iteration, but is not itself an addrec in this loop.
+        if (isLoopInvariant(Accum, L) ||
+            (isa<SCEVAddRecExpr>(Accum) &&
+             cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+          // If the increment doesn't overflow, then neither the addrec nor
+          // the post-increment will overflow.
+          if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+            if (OBO->getOperand(0) == PN) {
+              if (OBO->hasNoUnsignedWrap())
+                Flags = setFlags(Flags, SCEV::FlagNUW);
+              if (OBO->hasNoSignedWrap())
+                Flags = setFlags(Flags, SCEV::FlagNSW);
+            }
+          } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+            // If the increment is an inbounds GEP, then we know the address
+            // space cannot be wrapped around. We cannot make any guarantee
+            // about signed or unsigned overflow because pointers are
+            // unsigned but we may have a negative index from the base
+            // pointer. We can guarantee that no unsigned wrap occurs if the
+            // indices form a positive value.
+            if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+              Flags = setFlags(Flags, SCEV::FlagNW);
+
+              const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+              if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+                Flags = setFlags(Flags, SCEV::FlagNUW);
+            }
+
+            // We cannot transfer nuw and nsw flags from subtraction
+            // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+            // for instance.
+          }
+
+          const SCEV *StartVal = getSCEV(StartValueV);
+          const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+          // Since the no-wrap flags are on the increment, they apply to the
+          // post-incremented value as well.
+          if (isLoopInvariant(Accum, L))
+            (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+          // Okay, for the entire analysis of this edge we assumed the PHI
+          // to be symbolic.  We now need to go back and purge all of the
+          // entries for the scalars that use the symbolic expression.
+          ForgetSymbolicName(PN, SymbolicName);
+          ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+          return PHISCEV;
+        }
+      }
+    } else {
+      // Otherwise, this could be a loop like this:
+      //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+      // In this case, j = {1,+,1}  and BEValue is j.
+      // Because the other in-value of i (0) fits the evolution of BEValue
+      // i really is an addrec evolution.
+      //
+      // We can generalize this saying that i is the shifted value of BEValue
+      // by one iteration:
+      //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
+      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+      if (Shifted != getCouldNotCompute() &&
+          Start != getCouldNotCompute()) {
+        const SCEV *StartVal = getSCEV(StartValueV);
+        if (Start == StartVal) {
+          // Okay, for the entire analysis of this edge we assumed the PHI
+          // to be symbolic.  We now need to go back and purge all of the
+          // entries for the scalars that use the symbolic expression.
+          ForgetSymbolicName(PN, SymbolicName);
+          ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+          return Shifted;
+        }
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+// Checks if the SCEV S is available at BB.  S is considered available at BB
+// if S can be materialized at BB without introducing a fault.
+static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
+                               BasicBlock *BB) {
+  struct CheckAvailable {
+    bool TraversalDone = false;
+    bool Available = true;
+
+    const Loop *L = nullptr;  // The loop BB is in (can be nullptr)
+    BasicBlock *BB = nullptr;
+    DominatorTree &DT;
+
+    CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
+      : L(L), BB(BB), DT(DT) {}
+
+    bool setUnavailable() {
+      TraversalDone = true;
+      Available = false;
+      return false;
+    }
+
+    bool follow(const SCEV *S) {
+      switch (S->getSCEVType()) {
+      case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
+      case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+        // These expressions are available if their operand(s) is/are.
+        return true;
+
+      case scAddRecExpr: {
+        // We allow add recurrences that are on the loop BB is in, or some
+        // outer loop.  This guarantees availability because the value of the
+        // add recurrence at BB is simply the "current" value of the induction
+        // variable.  We can relax this in the future; for instance an add
+        // recurrence on a sibling dominating loop is also available at BB.
+        const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
+        if (L && (ARLoop == L || ARLoop->contains(L)))
+          return true;
+
+        return setUnavailable();
+      }
+
+      case scUnknown: {
+        // For SCEVUnknown, we check for simple dominance.
+        const auto *SU = cast<SCEVUnknown>(S);
+        Value *V = SU->getValue();
+
+        if (isa<Argument>(V))
+          return false;
+
+        if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
+          return false;
+
+        return setUnavailable();
+      }
+
+      case scUDivExpr:
+      case scCouldNotCompute:
+        // We do not try to smart about these at all.
+        return setUnavailable();
+      }
+      llvm_unreachable("switch should be fully covered!");
+    }
+
+    bool isDone() { return TraversalDone; }
+  };
+
+  CheckAvailable CA(L, BB, DT);
+  SCEVTraversal<CheckAvailable> ST(CA);
+
+  ST.visitAll(S);
+  return CA.Available;
+}
+
+// Try to match a control flow sequence that branches out at BI and merges back
+// at Merge into a "C ? LHS : RHS" select pattern.  Return true on a successful
+// match.
+static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
+                          Value *&C, Value *&LHS, Value *&RHS) {
+  C = BI->getCondition();
+
+  BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
+  BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
+
+  if (!LeftEdge.isSingleEdge())
+    return false;
+
+  assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
+
+  Use &LeftUse = Merge->getOperandUse(0);
+  Use &RightUse = Merge->getOperandUse(1);
+
+  if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
+    LHS = LeftUse;
+    RHS = RightUse;
+    return true;
+  }
+
+  if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
+    LHS = RightUse;
+    RHS = LeftUse;
+    return true;
+  }
+
+  return false;
+}
+
+const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
+  if (PN->getNumIncomingValues() == 2) {
+    const Loop *L = LI.getLoopFor(PN->getParent());
+
+    // We don't want to break LCSSA, even in a SCEV expression tree.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
+        return nullptr;
+
+    // Try to match
+    //
+    //  br %cond, label %left, label %right
+    // left:
+    //  br label %merge
+    // right:
+    //  br label %merge
+    // merge:
+    //  V = phi [ %x, %left ], [ %y, %right ]
+    //
+    // as "select %cond, %x, %y"
+
+    BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
+    assert(IDom && "At least the entry block should dominate PN");
+
+    auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+    Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
+
+    if (BI && BI->isConditional() &&
+        BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
+        IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
+        IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
+      return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+  }
+
+  return nullptr;
+}
+
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+  if (const SCEV *S = createAddRecFromPHI(PN))
+    return S;
+
+  if (const SCEV *S = createNodeFromSelectLikePHI(PN))
+    return S;
+
+  // If the PHI has a single incoming value, follow that value, unless the
+  // PHI's incoming blocks are in a different loop, in which case doing so
+  // risks breaking LCSSA form. Instcombine would normally zap these, but
+  // it doesn't have DominatorTree information, so it may miss cases.
+  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+    if (LI.replacementPreservesLCSSAForm(PN, V))
+      return getSCEV(V);
+
+  // If it's not a loop phi, we can't handle it yet.
+  return getUnknown(PN);
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
+                                                      Value *Cond,
+                                                      Value *TrueVal,
+                                                      Value *FalseVal) {
+  // Handle "constant" branch or select. This can occur for instance when a
+  // loop pass transforms an inner loop and moves on to process the outer loop.
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+  // Try to match some simple smax or umax patterns.
+  auto *ICI = dyn_cast<ICmpInst>(Cond);
+  if (!ICI)
+    return getUnknown(I);
+
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+
+  switch (ICI->getPredicate()) {
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    std::swap(LHS, RHS);
+  // fall through
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    // a >s b ? a+x : b+x  ->  smax(a, b)+x
+    // a >s b ? b+x : a+x  ->  smin(a, b)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+      const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
+      const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, RS);
+      if (LDiff == RDiff)
+        return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+      LDiff = getMinusSCEV(LA, RS);
+      RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getSMinExpr(LS, RS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    std::swap(LHS, RHS);
+  // fall through
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    // a >u b ? a+x : b+x  ->  umax(a, b)+x
+    // a >u b ? b+x : a+x  ->  umin(a, b)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, RS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+      LDiff = getMinusSCEV(LA, RS);
+      RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMinExpr(LS, RS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_NE:
+    // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+      const SCEV *One = getOne(I->getType());
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, One);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(One, LS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+      const SCEV *One = getOne(I->getType());
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, One);
+      const SCEV *RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(One, LS), LDiff);
+    }
+    break;
+  default:
+    break;
+  }
+
+  return getUnknown(I);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+  Value *Base = GEP->getOperand(0);
+  // Don't attempt to analyze GEPs over unsized objects.
+  if (!Base->getType()->getPointerElementType()->isSized())
+    return getUnknown(GEP);
+
+  SmallVector<const SCEV *, 4> IndexExprs;
+  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+    IndexExprs.push_back(getSCEV(*Index));
+  return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
+                    GEP->isInBounds());
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration).  It is, at the same time,
+/// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
+/// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return C->getAPInt().countTrailingZeros();
+
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+    return std::min(GetMinTrailingZeros(T->getOperand()),
+                    (uint32_t)getTypeSizeInBits(T->getType()));
+
+  if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    // The result is the sum of all operands results.
+    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+    uint32_t BitWidth = getTypeSizeInBits(M->getType());
+    for (unsigned i = 1, e = M->getNumOperands();
+         SumOpRes != BitWidth && i != e; ++i)
+      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+                          BitWidth);
+    return SumOpRes;
+  }
+
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
+                     nullptr, &DT);
+    return Zeros.countTrailingOnes();
+  }
+
+  // SCEVUDivExpr
+  return 0;
+}
+
+/// GetRangeFromMetadata - Helper method to assign a range to V from
+/// metadata present in the IR.
+static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
+      return getConstantRangeFromMetadata(*MD);
+
+  return None;
+}
+
+/// getRange - Determine the range for a particular SCEV.  If SignHint is
+/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
+/// with a "cleaner" unsigned (resp. signed) representation.
+///
+ConstantRange
+ScalarEvolution::getRange(const SCEV *S,
+                          ScalarEvolution::RangeSignHint SignHint) {
+  DenseMap<const SCEV *, ConstantRange> &Cache =
+      SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
+                                                       : SignedRanges;
+
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
+  if (I != Cache.end())
+    return I->second;
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return setRange(C, SignHint, ConstantRange(C->getAPInt()));
+
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum value will have those known zeros
+  // as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0) {
+    if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
+      ConservativeResult =
+          ConstantRange(APInt::getMinValue(BitWidth),
+                        APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+    else
+      ConservativeResult = ConstantRange(
+          APInt::getSignedMinValue(BitWidth),
+          APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getRange(Add->getOperand(0), SignHint);
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getRange(Add->getOperand(i), SignHint));
+    return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getRange(Mul->getOperand(0), SignHint);
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+    return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getRange(SMax->getOperand(0), SignHint);
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getRange(SMax->getOperand(i), SignHint));
+    return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getRange(UMax->getOperand(0), SignHint);
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getRange(UMax->getOperand(i), SignHint));
+    return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getRange(UDiv->getLHS(), SignHint);
+    ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+    return setRange(UDiv, SignHint,
+                    ConservativeResult.intersectWith(X.udiv(Y)));
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+    return setRange(ZExt, SignHint,
+                    ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getRange(SExt->getOperand(), SignHint);
+    return setRange(SExt, SignHint,
+                    ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+    return setRange(Trunc, SignHint,
+                    ConservativeResult.intersectWith(X.truncate(BitWidth)));
+  }
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    // If there's no unsigned wrap, the value will never be less than its
+    // initial value.
+    if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+        if (!C->getValue()->isZero())
+          ConservativeResult = ConservativeResult.intersectWith(
+              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
+
+    // If there's no signed wrap, and all the operands have the same sign or
+    // zero, the value won't ever change sign.
+    if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
+      bool AllNonNeg = true;
+      bool AllNonPos = true;
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      }
+      if (AllNonNeg)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt(BitWidth, 0),
+                        APInt::getSignedMinValue(BitWidth)));
+      else if (AllNonPos)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt::getSignedMinValue(BitWidth),
+                        APInt(BitWidth, 1)));
+    }
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+
+        // Check for overflow.  This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange ZExtMaxBECountRange =
+            MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+        ConstantRange StepSRange = getSignedRange(Step);
+        ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+
+        ConstantRange StartURange = getUnsignedRange(Start);
+        ConstantRange EndURange =
+            StartURange.add(MaxBECountRange.multiply(StepSRange));
+
+        // Check for unsigned overflow.
+        ConstantRange ZExtStartURange =
+            StartURange.zextOrTrunc(BitWidth * 2 + 1);
+        ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+        if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+            ZExtEndURange) {
+          APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
+                                     EndURange.getUnsignedMin());
+          APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
+                                     EndURange.getUnsignedMax());
+          bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
+          if (!IsFullRange)
+            ConservativeResult =
+                ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+        }
+
+        ConstantRange StartSRange = getSignedRange(Start);
+        ConstantRange EndSRange =
+            StartSRange.add(MaxBECountRange.multiply(StepSRange));
+
+        // Check for signed overflow. This must be done with ConstantRange
+        // arithmetic because we could be called from within the ScalarEvolution
+        // overflow checking code.
+        ConstantRange SExtStartSRange =
+            StartSRange.sextOrTrunc(BitWidth * 2 + 1);
+        ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+        if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+            SExtEndSRange) {
+          APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
+                                     EndSRange.getSignedMin());
+          APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
+                                     EndSRange.getSignedMax());
+          bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
+          if (!IsFullRange)
+            ConservativeResult =
+                ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+        }
+      }
+    }
+
+    return setRange(AddRec, SignHint, ConservativeResult);
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // Check if the IR explicitly contains !range metadata.
+    Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+    if (MDRange.hasValue())
+      ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
+    // Split here to avoid paying the compile-time cost of calling both
+    // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
+    // if needed.
+    const DataLayout &DL = getDataLayout();
+    if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
+      // For a SCEVUnknown, ask ValueTracking.
+      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
+      if (Ones != ~Zeros + 1)
+        ConservativeResult =
+            ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+    } else {
+      assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
+             "generalize as needed!");
+      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+      if (NS > 1)
+        ConservativeResult = ConservativeResult.intersectWith(
+            ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                          APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+    }
+
+    return setRange(U, SignHint, ConservativeResult);
+  }
+
+  return setRange(S, SignHint, ConservativeResult);
+}
+
+SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
+  const BinaryOperator *BinOp = cast<BinaryOperator>(V);
+
+  // Return early if there are no flags to propagate to the SCEV.
+  SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+  if (BinOp->hasNoUnsignedWrap())
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+  if (BinOp->hasNoSignedWrap())
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+  if (Flags == SCEV::FlagAnyWrap) {
+    return SCEV::FlagAnyWrap;
+  }
+
+  // Here we check that BinOp is in the header of the innermost loop
+  // containing BinOp, since we only deal with instructions in the loop
+  // header. The actual loop we need to check later will come from an add
+  // recurrence, but getting that requires computing the SCEV of the operands,
+  // which can be expensive. This check we can do cheaply to rule out some
+  // cases early.
+  Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
+  if (innermostContainingLoop == nullptr ||
+      innermostContainingLoop->getHeader() != BinOp->getParent())
+    return SCEV::FlagAnyWrap;
+
+  // Only proceed if we can prove that BinOp does not yield poison.
+  if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
+
+  // At this point we know that if V is executed, then it does not wrap
+  // according to at least one of NSW or NUW. If V is not executed, then we do
+  // not know if the calculation that V represents would wrap. Multiple
+  // instructions can map to the same SCEV. If we apply NSW or NUW from V to
+  // the SCEV, we must guarantee no wrapping for that SCEV also when it is
+  // derived from other instructions that map to the same SCEV. We cannot make
+  // that guarantee for cases where V is not executed. So we need to find the
+  // loop that V is considered in relation to and prove that V is executed for
+  // every iteration of that loop. That implies that the value that V
+  // calculates does not wrap anywhere in the loop, so then we can apply the
+  // flags to the SCEV.
+  //
+  // We check isLoopInvariant to disambiguate in case we are adding two
+  // recurrences from different loops, so that we know which loop to prove
+  // that V is executed in.
+  for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
+    const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
+    if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+      const int OtherOpIndex = 1 - OpIndex;
+      const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
+      if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
+          isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
+        return Flags;
+    }
+  }
+  return SCEV::FlagAnyWrap;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.  Analyze
+/// the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+  if (!isSCEVable(V->getType()))
+    return getUnknown(V);
+
+  unsigned Opcode = Instruction::UserOp1;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+
+    // Don't attempt to analyze instructions in blocks that aren't
+    // reachable. Such instructions don't matter, and they aren't required
+    // to obey basic rules for definitions dominating uses which this
+    // analysis depends on.
+    if (!DT.isReachableFromEntry(I->getParent()))
+      return getUnknown(V);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    Opcode = CE->getOpcode();
+  else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    return getZero(V->getType());
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+  else
+    return getUnknown(V);
+
+  Operator *U = cast<Operator>(V);
+  switch (Opcode) {
+  case Instruction::Add: {
+    // The simple thing to do would be to just call getSCEV on both operands
+    // and call getAddExpr with the result. However if we're looking at a
+    // bunch of things all added together, this can be quite inefficient,
+    // because it leads to N-1 getAddExpr calls for N ultimate operands.
+    // Instead, gather up all the operands and make a single getAddExpr call.
+    // LLVM IR canonical form means we need only traverse the left operands.
+    SmallVector<const SCEV *, 4> AddOps;
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      unsigned Opcode = U ? U->getOpcode() : 0;
+      if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
+        assert(Op != V && "V should be an add");
+        AddOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        AddOps.push_back(OpSCEV);
+        break;
+      }
+
+      // If a NUW or NSW flag can be applied to the SCEV for this
+      // addition, then compute the SCEV for this addition by itself
+      // with a separate call to getAddExpr. We need to do that
+      // instead of pushing the operands of the addition onto AddOps,
+      // since the flags are only known to apply to this particular
+      // addition - they may not apply to other additions that can be
+      // formed with operands from AddOps.
+      const SCEV *RHS = getSCEV(U->getOperand(1));
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        const SCEV *LHS = getSCEV(U->getOperand(0));
+        if (Opcode == Instruction::Sub)
+          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+        else
+          AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+        break;
+      }
+
+      if (Opcode == Instruction::Sub)
+        AddOps.push_back(getNegativeSCEV(RHS));
+      else
+        AddOps.push_back(RHS);
+    }
+    return getAddExpr(AddOps);
+  }
+
+  case Instruction::Mul: {
+    SmallVector<const SCEV *, 4> MulOps;
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      if (!U || U->getOpcode() != Instruction::Mul) {
+        assert(Op != V && "V should be a mul");
+        MulOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        MulOps.push_back(OpSCEV);
+        break;
+      }
+
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+                                    getSCEV(U->getOperand(1)), Flags));
+        break;
+      }
+
+      MulOps.push_back(getSCEV(U->getOperand(1)));
+    }
+    return getMulExpr(MulOps);
+  }
+  case Instruction::UDiv:
+    return getUDivExpr(getSCEV(U->getOperand(0)),
+                       getSCEV(U->getOperand(1)));
+  case Instruction::Sub:
+    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+                        getNoWrapFlagsFromUB(U));
+  case Instruction::And:
+    // For an expression like x&255 that merely masks off the high bits,
+    // use zext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      if (CI->isNullValue())
+        return getSCEV(U->getOperand(1));
+      if (CI->isAllOnesValue())
+        return getSCEV(U->getOperand(0));
+      const APInt &A = CI->getValue();
+
+      // Instcombine's ShrinkDemandedConstant may strip bits out of
+      // constants, obscuring what would otherwise be a low-bits mask.
+      // Use computeKnownBits to compute what ShrinkDemandedConstant
+      // knew about to reconstruct a low-bits mask value.
+      unsigned LZ = A.countLeadingZeros();
+      unsigned TZ = A.countTrailingZeros();
+      unsigned BitWidth = A.getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(),
+                       0, &AC, nullptr, &DT);
+
+      APInt EffectiveMask =
+          APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
+      if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+        const SCEV *MulCount = getConstant(
+            ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+        return getMulExpr(
+            getZeroExtendExpr(
+                getTruncateExpr(
+                    getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
+                    IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+                U->getType()),
+            MulCount);
+      }
+    }
+    break;
+
+  case Instruction::Or:
+    // If the RHS of the Or is a constant, we may have something like:
+    // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
+    // optimizations will transparently handle this case.
+    //
+    // In order for this transformation to be safe, the LHS must be of the
+    // form X*(2^n) and the Or constant must be less than 2^n.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      const SCEV *LHS = getSCEV(U->getOperand(0));
+      const APInt &CIVal = CI->getValue();
+      if (GetMinTrailingZeros(LHS) >=
+          (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+        // Build a plain add SCEV.
+        const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+        // If the LHS of the add was an addrec and it has no-wrap flags,
+        // transfer the no-wrap flags, since an or won't introduce a wrap.
+        if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+          const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+          const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+            OldAR->getNoWrapFlags());
+        }
+        return S;
+      }
+    }
+    break;
+  case Instruction::Xor:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      // If the RHS of the xor is a signbit, then this is just an add.
+      // Instcombine turns add of signbit into xor as a strength reduction step.
+      if (CI->getValue().isSignBit())
+        return getAddExpr(getSCEV(U->getOperand(0)),
+                          getSCEV(U->getOperand(1)));
+
+      // If the RHS of xor is -1, then this is a not operation.
+      if (CI->isAllOnesValue())
+        return getNotSCEV(getSCEV(U->getOperand(0)));
+
+      // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+      // This is a variant of the check for xor with -1, and it handles
+      // the case where instcombine has trimmed non-demanded bits out
+      // of an xor with -1.
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+        if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+          if (BO->getOpcode() == Instruction::And &&
+              LCI->getValue() == CI->getValue())
+            if (const SCEVZeroExtendExpr *Z =
+                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+              Type *UTy = U->getType();
+              const SCEV *Z0 = Z->getOperand();
+              Type *Z0Ty = Z0->getType();
+              unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+              // If C is a low-bits mask, the zero extend is serving to
+              // mask off the high bits. Complement the operand and
+              // re-apply the zext.
+              if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+              // If C is a single bit, it may be in the sign-bit position
+              // before the zero-extend. In this case, represent the xor
+              // using an add, which is equivalent, and re-apply the zext.
+              APInt Trunc = CI->getValue().trunc(Z0TySize);
+              if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+                  Trunc.isSignBit())
+                return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+                                         UTy);
+            }
+    }
+    break;
+
+  case Instruction::Shl:
+    // Turn shift left of a constant amount into a multiply.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      // It is currently not resolved how to interpret NSW for left
+      // shift by BitWidth - 1, so we avoid applying flags in that
+      // case. Remove this check (or this comment) once the situation
+      // is resolved. See
+      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+      // and http://reviews.llvm.org/D8890 .
+      auto Flags = SCEV::FlagAnyWrap;
+      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
+    }
+    break;
+
+  case Instruction::LShr:
+    // Turn logical shift right of a constant into a unsigned divide.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
+      return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+    }
+    break;
+
+  case Instruction::AShr:
+    // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+      if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+        if (L->getOpcode() == Instruction::Shl &&
+            L->getOperand(1) == U->getOperand(1)) {
+          uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+          // If the shift count is not less than the bitwidth, the result of
+          // the shift is undefined. Don't try to analyze it, because the
+          // resolution chosen here may differ from the resolution chosen in
+          // other parts of the compiler.
+          if (CI->getValue().uge(BitWidth))
+            break;
+
+          uint64_t Amt = BitWidth - CI->getZExtValue();
+          if (Amt == BitWidth)
+            return getSCEV(L->getOperand(0));       // shift by zero --> noop
+          return
+            getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+                                              IntegerType::get(getContext(),
+                                                               Amt)),
+                              U->getType());
+        }
+    break;
+
+  case Instruction::Trunc:
+    return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::ZExt:
+    return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::SExt:
+    return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::BitCast:
+    // BitCasts are no-op casts so we just eliminate the cast.
+    if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+      return getSCEV(U->getOperand(0));
+    break;
+
+  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+  // lead to pointer expressions which cannot safely be expanded to GEPs,
+  // because ScalarEvolution doesn't respect the GEP aliasing rules when
+  // simplifying integer expressions.
+
+  case Instruction::GetElementPtr:
+    return createNodeForGEP(cast<GEPOperator>(U));
+
+  case Instruction::PHI:
+    return createNodeForPHI(cast<PHINode>(U));
+
+  case Instruction::Select:
+    // U can also be a select constant expr, which let fall through.  Since
+    // createNodeForSelect only works for a condition that is an `ICmpInst`, and
+    // constant expressions cannot have instructions as operands, we'd have
+    // returned getUnknown for a select constant expressions anyway.
+    if (isa<Instruction>(U))
+      return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
+                                      U->getOperand(1), U->getOperand(2));
+
+  default: // We cannot analyze this expression.
+    break;
+  }
+
+  return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                   Iteration Count Computation Code
+//
+
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+  if (BasicBlock *ExitingBB = L->getExitingBlock())
+    return getSmallConstantTripCount(L, ExitingBB);
+
+  // No trip count information for multiple exits.
+  return 0;
+}
+
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+                                                    BasicBlock *ExitingBlock) {
+  assert(ExitingBlock && "Must pass a non-null exiting block!");
+  assert(L->isLoopExiting(ExitingBlock) &&
+         "Exiting block must actually branch out of the loop!");
+  const SCEVConstant *ExitCount =
+      dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+  if (BasicBlock *ExitingBB = L->getExitingBlock())
+    return getSmallConstantTripMultiple(L, ExitingBB);
+
+  // No trip multiple information for multiple exits.
+  return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned
+ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+                                              BasicBlock *ExitingBlock) {
+  assert(ExitingBlock && "Must pass a non-null exiting block!");
+  assert(L->isLoopExiting(ExitingBlock) &&
+         "Exiting block must actually branch out of the loop!");
+  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+  if (ExitCount == getCouldNotCompute())
+    return 1;
+
+  // Get the trip count from the BE count by adding 1.
+  const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
+  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+  // to factor simple cases.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+    TCMul = Mul->getOperand(0);
+
+  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+  if (!MulC)
+    return 1;
+
+  ConstantInt *Result = MulC->getValue();
+
+  // Guard against huge trip counts (this requires checking
+  // for zero to handle the case where the trip count == -1 and the
+  // addition wraps).
+  if (!Result || Result->getValue().getActiveBits() > 32 ||
+      Result->getValue().getActiveBits() == 0)
+    return 1;
+
+  return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+  return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).getExact(this);
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).getMax(this);
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+  BasicBlock *Header = L->getHeader();
+
+  // Push all Loop-header PHIs onto the Worklist stack.
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+  // Initially insert an invalid entry for this loop. If the insertion
+  // succeeds, proceed to actually compute a backedge-taken count and
+  // update the value. The temporary CouldNotCompute value tells SCEV
+  // code elsewhere that it shouldn't attempt to request a new
+  // backedge-taken count, which could result in infinite recursion.
+  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+    BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  // computeBackedgeTakenCount may allocate memory for its result. Inserting it
+  // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+  // must be cleared in this scope.
+  BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
+
+  if (Result.getExact(this) != getCouldNotCompute()) {
+    assert(isLoopInvariant(Result.getExact(this), L) &&
+           isLoopInvariant(Result.getMax(this), L) &&
+           "Computed backedge-taken count isn't loop invariant for loop!");
+    ++NumTripCountsComputed;
+  }
+  else if (Result.getMax(this) == getCouldNotCompute() &&
+           isa<PHINode>(L->getHeader()->begin())) {
+    // Only count loops that have phi nodes as not being computable.
+    ++NumTripCountsNotComputed;
+  }
+
+  // Now that we know more about the trip count for this loop, forget any
+  // existing SCEV values for PHI nodes in this loop since they are only
+  // conservative estimates made without the benefit of trip count
+  // information. This is similar to the code in forgetLoop, except that
+  // it handles SCEVUnknown PHI nodes specially.
+  if (Result.hasAnyInfo()) {
+    SmallVector<Instruction *, 16> Worklist;
+    PushLoopPHIs(L, Worklist);
+
+    SmallPtrSet<Instruction *, 8> Visited;
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      if (!Visited.insert(I).second)
+        continue;
+
+      ValueExprMapType::iterator It =
+        ValueExprMap.find_as(static_cast<Value *>(I));
+      if (It != ValueExprMap.end()) {
+        const SCEV *Old = It->second;
+
+        // SCEVUnknown for a PHI either means that it has an unrecognized
+        // structure, or it's a PHI that's in the progress of being computed
+        // by createNodeForPHI.  In the former case, additional loop trip
+        // count information isn't going to change anything. In the later
+        // case, createNodeForPHI will perform the necessary updates on its
+        // own when it gets to that point.
+        if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          forgetMemoizedResults(Old);
+          ValueExprMap.erase(It);
+        }
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          ConstantEvolutionLoopExitValue.erase(PN);
+      }
+
+      PushDefUseChildren(I, Worklist);
+    }
+  }
+
+  // Re-lookup the insert position, since the call to
+  // computeBackedgeTakenCount above could result in a
+  // recusive call to getBackedgeTakenInfo (on a different
+  // loop), which would invalidate the iterator computed
+  // earlier.
+  return BackedgeTakenCounts.find(L)->second = Result;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+  // Drop any stored trip count value.
+  DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+    BackedgeTakenCounts.find(L);
+  if (BTCPos != BackedgeTakenCounts.end()) {
+    BTCPos->second.clear();
+    BackedgeTakenCounts.erase(BTCPos);
+  }
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  PushLoopPHIs(L, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I).second)
+      continue;
+
+    ValueExprMapType::iterator It =
+      ValueExprMap.find_as(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+
+  // Forget all contained loops too, to avoid dangling entries in the
+  // ValuesAtScopes map.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return;
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  Worklist.push_back(I);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    I = Worklist.pop_back_val();
+    if (!Visited.insert(I).second)
+      continue;
+
+    ValueExprMapType::iterator It =
+      ValueExprMap.find_as(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. A computable result can only be returned for loops with a single
+/// exit.  Returning the minimum taken count among all exits is incorrect
+/// because one of the loop's exit limit's may have been skipped. HowFarToZero
+/// assumes that the limit of each loop test is never skipped. This is a valid
+/// assumption as long as the loop exits via that test. For precise results, it
+/// is the caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+  // If any exits were not computable, the loop is not computable.
+  if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+  // We need exactly one computable exit.
+  if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+  assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+  const SCEV *BECount = nullptr;
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != nullptr; ENT = ENT->getNextExit()) {
+
+    assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+    if (!BECount)
+      BECount = ENT->ExactNotTaken;
+    else if (BECount != ENT->ExactNotTaken)
+      return SE->getCouldNotCompute();
+  }
+  assert(BECount && "Invalid not taken count for loop exit");
+  return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+                                             ScalarEvolution *SE) const {
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != nullptr; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExitingBlock == ExitingBlock)
+      return ENT->ExactNotTaken;
+  }
+  return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+  return Max ? Max : SE->getCouldNotCompute();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+                                                    ScalarEvolution *SE) const {
+  if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+    return true;
+
+  if (!ExitNotTaken.ExitingBlock)
+    return false;
+
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != nullptr; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+        && SE->hasOperand(ENT->ExactNotTaken, S)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+  SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+  bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+  if (!Complete)
+    ExitNotTaken.setIncomplete();
+
+  unsigned NumExits = ExitCounts.size();
+  if (NumExits == 0) return;
+
+  ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+  ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+  if (NumExits == 1) return;
+
+  // Handle the rare case of multiple computable exits.
+  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+  ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+  for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+    PrevENT->setNextExit(ENT);
+    ENT->ExitingBlock = ExitCounts[i].first;
+    ENT->ExactNotTaken = ExitCounts[i].second;
+  }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+  ExitNotTaken.ExitingBlock = nullptr;
+  ExitNotTaken.ExactNotTaken = nullptr;
+  delete[] ExitNotTaken.getNextExit();
+}
+
+/// computeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+  bool CouldComputeBECount = true;
+  BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
+  const SCEV *MustExitMaxBECount = nullptr;
+  const SCEV *MayExitMaxBECount = nullptr;
+
+  // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
+  // and compute maxBECount.
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBB = ExitingBlocks[i];
+    ExitLimit EL = computeExitLimit(L, ExitBB);
+
+    // 1. For each exit that can be computed, add an entry to ExitCounts.
+    // CouldComputeBECount is true only if all exits can be computed.
+    if (EL.Exact == getCouldNotCompute())
+      // We couldn't compute an exact value for this exit, so
+      // we won't be able to compute an exact value for the loop.
+      CouldComputeBECount = false;
+    else
+      ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
+
+    // 2. Derive the loop's MaxBECount from each exit's max number of
+    // non-exiting iterations. Partition the loop exits into two kinds:
+    // LoopMustExits and LoopMayExits.
+    //
+    // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
+    // is a LoopMayExit.  If any computable LoopMustExit is found, then
+    // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
+    // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
+    // considered greater than any computable EL.Max.
+    if (EL.Max != getCouldNotCompute() && Latch &&
+        DT.dominates(ExitBB, Latch)) {
+      if (!MustExitMaxBECount)
+        MustExitMaxBECount = EL.Max;
+      else {
+        MustExitMaxBECount =
+          getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+      }
+    } else if (MayExitMaxBECount != getCouldNotCompute()) {
+      if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
+        MayExitMaxBECount = EL.Max;
+      else {
+        MayExitMaxBECount =
+          getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+      }
+    }
+  }
+  const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
+    (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
+  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+
+  // Okay, we've chosen an exiting block.  See what condition causes us to exit
+  // at this block and remember the exit block and whether all other targets
+  // lead to the loop header.
+  bool MustExecuteLoopHeader = true;
+  BasicBlock *Exit = nullptr;
+  for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
+       SI != SE; ++SI)
+    if (!L->contains(*SI)) {
+      if (Exit) // Multiple exit successors.
+        return getCouldNotCompute();
+      Exit = *SI;
+    } else if (*SI != L->getHeader()) {
+      MustExecuteLoopHeader = false;
+    }
+
+  // At this point, we know we have a conditional branch that determines whether
+  // the loop is exited.  However, we don't know if the branch is executed each
+  // time through the loop.  If not, then the execution count of the branch will
+  // not be equal to the trip count of the loop.
+  //
+  // Currently we check for this by checking to see if the Exit branch goes to
+  // the loop header.  If so, we know it will always execute the same number of
+  // times as the loop.  We also handle the case where the exit block *is* the
+  // loop header.  This is common for un-rotated loops.
+  //
+  // If both of those tests fail, walk up the unique predecessor chain to the
+  // header, stopping if there is an edge that doesn't exit the loop. If the
+  // header is reached, the execution count of the branch will be equal to the
+  // trip count of the loop.
+  //
+  //  More extensive analysis could be done to handle more cases here.
+  //
+  if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
+    // The simple checks failed, try climbing the unique predecessor chain
+    // up to the header.
+    bool Ok = false;
+    for (BasicBlock *BB = ExitingBlock; BB; ) {
+      BasicBlock *Pred = BB->getUniquePredecessor();
+      if (!Pred)
+        return getCouldNotCompute();
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      for (const BasicBlock *PredSucc : PredTerm->successors()) {
+        if (PredSucc == BB)
+          continue;
+        // If the predecessor has a successor that isn't BB and isn't
+        // outside the loop, assume the worst.
+        if (L->contains(PredSucc))
+          return getCouldNotCompute();
+      }
+      if (Pred == L->getHeader()) {
+        Ok = true;
+        break;
+      }
+      BB = Pred;
+    }
+    if (!Ok)
+      return getCouldNotCompute();
+  }
+
+  bool IsOnlyExit = (L->getExitingBlock() != nullptr);
+  TerminatorInst *Term = ExitingBlock->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
+    assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+    // Proceed to the next level to examine the exit condition expression.
+    return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+                                    BI->getSuccessor(1),
+                                    /*ControlsExit=*/IsOnlyExit);
+  }
+
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+    return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
+                                                /*ControlsExit=*/IsOnlyExit);
+
+  return getCouldNotCompute();
+}
+
+/// computeExitLimitFromCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+///
+/// @param ControlsExit is true if ExitCond directly controls the exit
+/// branch. In this case, we can assume that the loop exits only if the
+/// condition is true and can infer that failing to meet the condition prior to
+/// integer wraparound results in undefined behavior.
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromCond(const Loop *L,
+                                          Value *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB,
+                                          bool ControlsExit) {
+  // Check if the controlling expression for this loop is an And or Or.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+    if (BO->getOpcode() == Instruction::And) {
+      // Recurse on the operands of the and.
+      bool EitherMayExit = L->contains(TBB);
+      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               ControlsExit && !EitherMayExit);
+      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               ControlsExit && !EitherMayExit);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (EitherMayExit) {
+        // Both conditions must be true for the loop to continue executing.
+        // Choose the less conservative count.
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+      } else {
+        // Both conditions must be true at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(FBB) && "Loop block has no successor in loop!");
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
+      }
+
+      return ExitLimit(BECount, MaxBECount);
+    }
+    if (BO->getOpcode() == Instruction::Or) {
+      // Recurse on the operands of the or.
+      bool EitherMayExit = L->contains(FBB);
+      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               ControlsExit && !EitherMayExit);
+      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               ControlsExit && !EitherMayExit);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (EitherMayExit) {
+        // Both conditions must be false for the loop to continue executing.
+        // Choose the less conservative count.
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+      } else {
+        // Both conditions must be false at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(TBB) && "Loop block has no successor in loop!");
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
+      }
+
+      return ExitLimit(BECount, MaxBECount);
+    }
+  }
+
+  // With an icmp, it may be feasible to compute an exact backedge-taken count.
+  // Proceed to the next level to examine the icmp.
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+
+  // Check for a constant condition. These are normally stripped out by
+  // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+  // preserve the CFG and is temporarily leaving constant conditions
+  // in place.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+    if (L->contains(FBB) == !CI->getZExtValue())
+      // The backedge is always taken.
+      return getCouldNotCompute();
+    else
+      // The backedge is never taken.
+      return getZero(CI->getType());
+  }
+
+  // If it's not an integer or pointer comparison then compute it the hard way.
+  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
+                                          ICmpInst *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB,
+                                          bool ControlsExit) {
+
+  // If the condition was exit on true, convert the condition to exit on false
+  ICmpInst::Predicate Cond;
+  if (!L->contains(FBB))
+    Cond = ExitCond->getPredicate();
+  else
+    Cond = ExitCond->getInversePredicate();
+
+  // Handle common loops like: for (X = "string"; *X; ++X)
+  if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+    if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+      ExitLimit ItCnt =
+        computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+      if (ItCnt.hasAnyInfo())
+        return ItCnt;
+    }
+
+  ExitLimit ShiftEL = computeShiftCompareExitLimit(
+      ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond);
+  if (ShiftEL.hasAnyInfo())
+    return ShiftEL;
+
+  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+  // Try to evaluate any dependencies out of the loop.
+  LHS = getSCEVAtScope(LHS, L);
+  RHS = getSCEVAtScope(RHS, L);
+
+  // At this point, we would like to compute how many iterations of the
+  // loop the predicate will return true for these inputs.
+  if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+    // If there is a loop-invariant, force it into the RHS.
+    std::swap(LHS, RHS);
+    Cond = ICmpInst::getSwappedPredicate(Cond);
+  }
+
+  // Simplify the operands before analyzing them.
+  (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+  // If we have a comparison of a chrec against a constant, try to use value
+  // ranges to answer this query.
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+    if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+      if (AddRec->getLoop() == L) {
+        // Form the constant range.
+        ConstantRange CompRange(
+            ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
+
+        const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+        if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+      }
+
+  switch (Cond) {
+  case ICmpInst::ICMP_NE: {                     // while (X != Y)
+    // Convert to: while (X-Y != 0)
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+    if (EL.hasAnyInfo()) return EL;
+    break;
+  }
+  case ICmpInst::ICMP_EQ: {                     // while (X == Y)
+    // Convert to: while (X-Y == 0)
+    ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (EL.hasAnyInfo()) return EL;
+    break;
+  }
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_ULT: {                    // while (X < Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
+    if (EL.hasAnyInfo()) return EL;
+    break;
+  }
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_UGT: {                    // while (X > Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+    ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
+    if (EL.hasAnyInfo()) return EL;
+    break;
+  }
+  default:
+    break;
+  }
+  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
+                                                      SwitchInst *Switch,
+                                                      BasicBlock *ExitingBlock,
+                                                      bool ControlsExit) {
+  assert(!L->contains(ExitingBlock) && "Not an exiting block!");
+
+  // Give up if the exit is the default dest of a switch.
+  if (Switch->getDefaultDest() == ExitingBlock)
+    return getCouldNotCompute();
+
+  assert(L->contains(Switch->getDefaultDest()) &&
+         "Default case must not exit the loop!");
+  const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
+  const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
+
+  // while (X != Y) --> while (X-Y != 0)
+  ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+  if (EL.hasAnyInfo())
+    return EL;
+
+  return getCouldNotCompute();
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+                                ScalarEvolution &SE) {
+  const SCEV *InVal = SE.getConstant(C);
+  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+  assert(isa<SCEVConstant>(Val) &&
+         "Evaluation of SCEV at constant didn't fold correctly?");
+  return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// computeLoadConstantCompareExitLimit - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeLoadConstantCompareExitLimit(
+  LoadInst *LI,
+  Constant *RHS,
+  const Loop *L,
+  ICmpInst::Predicate predicate) {
+
+  if (LI->isVolatile()) return getCouldNotCompute();
+
+  // Check to see if the loaded pointer is a getelementptr of a global.
+  // TODO: Use SCEV instead of manually grubbing with GEPs.
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+  if (!GEP) return getCouldNotCompute();
+
+  // Make sure that it is really a constant global we are gepping, with an
+  // initializer, and make sure the first IDX is really 0.
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+      !cast<Constant>(GEP->getOperand(1))->isNullValue())
+    return getCouldNotCompute();
+
+  // Okay, we allow one non-constant index into the GEP instruction.
+  Value *VarIdx = nullptr;
+  std::vector<Constant*> Indexes;
+  unsigned VarIdxNum = 0;
+  for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      Indexes.push_back(CI);
+    } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+      if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
+      VarIdx = GEP->getOperand(i);
+      VarIdxNum = i-2;
+      Indexes.push_back(nullptr);
+    }
+
+  // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+  if (!VarIdx)
+    return getCouldNotCompute();
+
+  // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+  // Check to see if X is a loop variant variable value now.
+  const SCEV *Idx = getSCEV(VarIdx);
+  Idx = getSCEVAtScope(Idx, L);
+
+  // We can only recognize very limited forms of loop index expressions, in
+  // particular, only affine AddRec's like {C1,+,C2}.
+  const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+  if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+    return getCouldNotCompute();
+
+  unsigned MaxSteps = MaxBruteForceIterations;
+  for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+    ConstantInt *ItCst = ConstantInt::get(
+                           cast<IntegerType>(IdxExpr->getType()), IterationNum);
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+    // Form the GEP offset.
+    Indexes[VarIdxNum] = Val;
+
+    Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+                                                         Indexes);
+    if (!Result) break;  // Cannot compute!
+
+    // Evaluate the condition for this iteration.
+    Result = ConstantExpr::getICmp(predicate, Result, RHS);
+    if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
+    if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+      ++NumArrayLenItCounts;
+      return getConstant(ItCst);   // Found terminating iteration!
+    }
+  }
+  return getCouldNotCompute();
+}
+
+ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
+    Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
+  ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
+  if (!RHS)
+    return getCouldNotCompute();
+
+  const BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return getCouldNotCompute();
+
+  const BasicBlock *Predecessor = L->getLoopPredecessor();
+  if (!Predecessor)
+    return getCouldNotCompute();
+
+  // Return true if V is of the form "LHS `shift_op` <positive constant>".
+  // Return LHS in OutLHS and shift_opt in OutOpCode.
+  auto MatchPositiveShift =
+      [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
+
+    using namespace PatternMatch;
+
+    ConstantInt *ShiftAmt;
+    if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::LShr;
+    else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::AShr;
+    else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::Shl;
+    else
+      return false;
+
+    return ShiftAmt->getValue().isStrictlyPositive();
+  };
+
+  // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
+  //
+  // loop:
+  //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
+  //   %iv.shifted = lshr i32 %iv, <positive constant>
+  //
+  // Return true on a succesful match.  Return the corresponding PHI node (%iv
+  // above) in PNOut and the opcode of the shift operation in OpCodeOut.
+  auto MatchShiftRecurrence =
+      [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
+    Optional<Instruction::BinaryOps> PostShiftOpCode;
+
+    {
+      Instruction::BinaryOps OpC;
+      Value *V;
+
+      // If we encounter a shift instruction, "peel off" the shift operation,
+      // and remember that we did so.  Later when we inspect %iv's backedge
+      // value, we will make sure that the backedge value uses the same
+      // operation.
+      //
+      // Note: the peeled shift operation does not have to be the same
+      // instruction as the one feeding into the PHI's backedge value.  We only
+      // really care about it being the same *kind* of shift instruction --
+      // that's all that is required for our later inferences to hold.
+      if (MatchPositiveShift(LHS, V, OpC)) {
+        PostShiftOpCode = OpC;
+        LHS = V;
+      }
+    }
+
+    PNOut = dyn_cast<PHINode>(LHS);
+    if (!PNOut || PNOut->getParent() != L->getHeader())
+      return false;
+
+    Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
+    Value *OpLHS;
+
+    return
+        // The backedge value for the PHI node must be a shift by a positive
+        // amount
+        MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
+
+        // of the PHI node itself
+        OpLHS == PNOut &&
+
+        // and the kind of shift should be match the kind of shift we peeled
+        // off, if any.
+        (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+  };
+
+  PHINode *PN;
+  Instruction::BinaryOps OpCode;
+  if (!MatchShiftRecurrence(LHS, PN, OpCode))
+    return getCouldNotCompute();
+
+  const DataLayout &DL = getDataLayout();
+
+  // The key rationale for this optimization is that for some kinds of shift
+  // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
+  // within a finite number of iterations.  If the condition guarding the
+  // backedge (in the sense that the backedge is taken if the condition is true)
+  // is false for the value the shift recurrence stabilizes to, then we know
+  // that the backedge is taken only a finite number of times.
+
+  ConstantInt *StableValue = nullptr;
+  switch (OpCode) {
+  default:
+    llvm_unreachable("Impossible case!");
+
+  case Instruction::AShr: {
+    // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
+    // bitwidth(K) iterations.
+    Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
+    bool KnownZero, KnownOne;
+    ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
+                   Predecessor->getTerminator(), &DT);
+    auto *Ty = cast<IntegerType>(RHS->getType());
+    if (KnownZero)
+      StableValue = ConstantInt::get(Ty, 0);
+    else if (KnownOne)
+      StableValue = ConstantInt::get(Ty, -1, true);
+    else
+      return getCouldNotCompute();
+
+    break;
+  }
+  case Instruction::LShr:
+  case Instruction::Shl:
+    // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
+    // stabilize to 0 in at most bitwidth(K) iterations.
+    StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
+    break;
+  }
+
+  auto *Result =
+      ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
+  assert(Result->getType()->isIntegerTy(1) &&
+         "Otherwise cannot be an operand to a branch instruction");
+
+  if (Result->isZeroValue()) {
+    unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+    const SCEV *UpperBound =
+        getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
+    return ExitLimit(getCouldNotCompute(), UpperBound);
+  }
+
+  return getCouldNotCompute();
+}
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+  if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+      isa<LoadInst>(I))
+    return true;
+
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction())
+      return canConstantFoldCallTo(F);
+  return false;
+}
+
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+  // An instruction outside of the loop can't be derived from a loop PHI.
+  if (!L->contains(I)) return false;
+
+  if (isa<PHINode>(I)) {
+    // We don't currently keep track of the control flow needed to evaluate
+    // PHIs, so we cannot handle PHIs inside of loops.
+    return L->getHeader() == I->getParent();
+  }
+
+  // If we won't be able to constant fold this expression even if the operands
+  // are constants, bail early.
+  return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+                               DenseMap<Instruction *, PHINode *> &PHIMap) {
+
+  // Otherwise, we can evaluate this instruction if all of its operands are
+  // constant or derived from a PHI node themselves.
+  PHINode *PHI = nullptr;
+  for (Value *Op : UseInst->operands()) {
+    if (isa<Constant>(Op)) continue;
+
+    Instruction *OpInst = dyn_cast<Instruction>(Op);
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
+
+    PHINode *P = dyn_cast<PHINode>(OpInst);
+    if (!P)
+      // If this operand is already visited, reuse the prior result.
+      // We may have P != PHI if this is the deepest point at which the
+      // inconsistent paths meet.
+      P = PHIMap.lookup(OpInst);
+    if (!P) {
+      // Recurse and memoize the results, whether a phi is found or not.
+      // This recursive call invalidates pointers into PHIMap.
+      P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+      PHIMap[OpInst] = P;
+    }
+    if (!P)
+      return nullptr;  // Not evolving from PHI
+    if (PHI && PHI != P)
+      return nullptr;  // Evolving from multiple different PHIs.
+    PHI = P;
+  }
+  // This is a expression evolving from a constant PHI!
+  return PHI;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from.  We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI.  If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || !canConstantEvolve(I, L)) return nullptr;
+
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    return PN;
+
+  // Record non-constant instructions contained by the loop.
+  DenseMap<Instruction *, PHINode *> PHIMap;
+  return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal.  If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+                                    DenseMap<Instruction *, Constant *> &Vals,
+                                    const DataLayout &DL,
+                                    const TargetLibraryInfo *TLI) {
+  // Convenient constant check, but redundant for recursive calls.
+  if (Constant *C = dyn_cast<Constant>(V)) return C;
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return nullptr;
+
+  if (Constant *C = Vals.lookup(I)) return C;
+
+  // An instruction inside the loop depends on a value outside the loop that we
+  // weren't given a mapping for, or a value such as a call inside the loop.
+  if (!canConstantEvolve(I, L)) return nullptr;
+
+  // An unmapped PHI can be due to a branch or another loop inside this loop,
+  // or due to this not being the initial iteration through a loop where we
+  // couldn't compute the evolution of this particular PHI last time.
+  if (isa<PHINode>(I)) return nullptr;
+
+  std::vector<Constant*> Operands(I->getNumOperands());
+
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+    if (!Operand) {
+      Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+      if (!Operands[i]) return nullptr;
+      continue;
+    }
+    Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
+    Vals[Operand] = C;
+    if (!C) return nullptr;
+    Operands[i] = C;
+  }
+
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                           Operands[1], DL, TLI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isVolatile())
+      return ConstantFoldLoadFromConstPtr(Operands[0], DL);
+  }
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
+                                  TLI);
+}
+
+
+// If every incoming value to PN except the one for BB is a specific Constant,
+// return that, else return nullptr.
+static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
+  Constant *IncomingVal = nullptr;
+
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    if (PN->getIncomingBlock(i) == BB)
+      continue;
+
+    auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
+    if (!CurrentVal)
+      return nullptr;
+
+    if (IncomingVal != CurrentVal) {
+      if (IncomingVal)
+        return nullptr;
+      IncomingVal = CurrentVal;
+    }
+  }
+
+  return IncomingVal;
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+                                                   const APInt &BEs,
+                                                   const Loop *L) {
+  auto I = ConstantEvolutionLoopExitValue.find(PN);
+  if (I != ConstantEvolutionLoopExitValue.end())
+    return I->second;
+
+  if (BEs.ugt(MaxBruteForceIterations))
+    return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
+
+  Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return nullptr;
+
+  for (auto &I : *Header) {
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI) break;
+    auto *StartCST = getOtherIncomingValue(PHI, Latch);
+    if (!StartCST) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return RetVal = nullptr;
+
+  Value *BEValue = PN->getIncomingValueForBlock(Latch);
+
+  // Execute the loop symbolically to determine the exit value.
+  if (BEs.getActiveBits() >= 32)
+    return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
+
+  unsigned NumIterations = BEs.getZExtValue(); // must be in range
+  unsigned IterationNum = 0;
+  const DataLayout &DL = getDataLayout();
+  for (; ; ++IterationNum) {
+    if (IterationNum == NumIterations)
+      return RetVal = CurrentIterVals[PN];  // Got exit value!
+
+    // Compute the value of the PHIs for the next iteration.
+    // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+    DenseMap<Instruction *, Constant *> NextIterVals;
+    Constant *NextPHI =
+        EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
+    if (!NextPHI)
+      return nullptr;        // Couldn't evaluate!
+    NextIterVals[PN] = NextPHI;
+
+    bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+    // Also evaluate the other PHI nodes.  However, we don't get to stop if we
+    // cease to be able to evaluate one of them or if they stop evolving,
+    // because that doesn't necessarily prevent us from computing PN.
+    SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+    for (const auto &I : CurrentIterVals) {
+      PHINode *PHI = dyn_cast<PHINode>(I.first);
+      if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+      PHIsToCompute.emplace_back(PHI, I.second);
+    }
+    // We use two distinct loops because EvaluateExpression may invalidate any
+    // iterators into CurrentIterVals.
+    for (const auto &I : PHIsToCompute) {
+      PHINode *PHI = I.first;
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (!NextPHI) {   // Not already computed.
+        Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
+      }
+      if (NextPHI != I.second)
+        StoppedEvolving = false;
+    }
+
+    // If all entries in CurrentIterVals == NextIterVals then we can stop
+    // iterating, the loop can't continue to change.
+    if (StoppedEvolving)
+      return RetVal = CurrentIterVals[PN];
+
+    CurrentIterVals.swap(NextIterVals);
+  }
+}
+
+const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
+                                                          Value *Cond,
+                                                          bool ExitWhen) {
+  PHINode *PN = getConstantEvolvingPHI(Cond, L);
+  if (!PN) return getCouldNotCompute();
+
+  // If the loop is canonicalized, the PHI will have exactly two entries.
+  // That's the only form we support here.
+  if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
+  BasicBlock *Latch = L->getLoopLatch();
+  assert(Latch && "Should follow from NumIncomingValues == 2!");
+
+  for (auto &I : *Header) {
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI)
+      break;
+    auto *StartCST = getOtherIncomingValue(PHI, Latch);
+    if (!StartCST) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return getCouldNotCompute();
+
+  // Okay, we find a PHI node that defines the trip count of this loop.  Execute
+  // the loop symbolically to determine when the condition gets a value of
+  // "ExitWhen".
+  unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
+  const DataLayout &DL = getDataLayout();
+  for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
+    auto *CondVal = dyn_cast_or_null<ConstantInt>(
+        EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
+
+    // Couldn't symbolically evaluate.
+    if (!CondVal) return getCouldNotCompute();
+
+    if (CondVal->getValue() == uint64_t(ExitWhen)) {
+      ++NumBruteForceTripCountsComputed;
+      return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+    }
+
+    // Update all the PHI nodes for the next iteration.
+    DenseMap<Instruction *, Constant *> NextIterVals;
+
+    // Create a list of which PHIs we need to compute. We want to do this before
+    // calling EvaluateExpression on them because that may invalidate iterators
+    // into CurrentIterVals.
+    SmallVector<PHINode *, 8> PHIsToCompute;
+    for (const auto &I : CurrentIterVals) {
+      PHINode *PHI = dyn_cast<PHINode>(I.first);
+      if (!PHI || PHI->getParent() != Header) continue;
+      PHIsToCompute.push_back(PHI);
+    }
+    for (PHINode *PHI : PHIsToCompute) {
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (NextPHI) continue;    // Already computed!
+
+      Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
+    }
+    CurrentIterVals.swap(NextIterVals);
+  }
+
+  // Too many iterations were needed to evaluate.
+  return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program.  The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
+      ValuesAtScopes[V];
+  // Check to see if we've folded this expression at this loop before.
+  for (auto &LS : Values)
+    if (LS.first == L)
+      return LS.second ? LS.second : V;
+
+  Values.emplace_back(L, nullptr);
+
+  // Otherwise compute it.
+  const SCEV *C = computeSCEVAtScope(V, L);
+  for (auto &LS : reverse(ValuesAtScopes[V]))
+    if (LS.first == L) {
+      LS.second = C;
+      break;
+    }
+  return C;
+}
+
+/// This builds up a Constant using the ConstantExpr interface.  That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+  switch (static_cast<SCEVTypes>(V->getSCEVType())) {
+    case scCouldNotCompute:
+    case scAddRecExpr:
+      break;
+    case scConstant:
+      return cast<SCEVConstant>(V)->getValue();
+    case scUnknown:
+      return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+    case scSignExtend: {
+      const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+        return ConstantExpr::getSExt(CastOp, SS->getType());
+      break;
+    }
+    case scZeroExtend: {
+      const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+        return ConstantExpr::getZExt(CastOp, SZ->getType());
+      break;
+    }
+    case scTruncate: {
+      const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+        return ConstantExpr::getTrunc(CastOp, ST->getType());
+      break;
+    }
+    case scAddExpr: {
+      const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+        if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+          unsigned AS = PTy->getAddressSpace();
+          Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+          C = ConstantExpr::getBitCast(C, DestPtrTy);
+        }
+        for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+          if (!C2) return nullptr;
+
+          // First pointer!
+          if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+            unsigned AS = C2->getType()->getPointerAddressSpace();
+            std::swap(C, C2);
+            Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+            // The offsets have been converted to bytes.  We can add bytes to an
+            // i8* by GEP with the byte count in the first index.
+            C = ConstantExpr::getBitCast(C, DestPtrTy);
+          }
+
+          // Don't bother trying to sum two pointers. We probably can't
+          // statically compute a load that results from it anyway.
+          if (C2->getType()->isPointerTy())
+            return nullptr;
+
+          if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+            if (PTy->getElementType()->isStructTy())
+              C2 = ConstantExpr::getIntegerCast(
+                  C2, Type::getInt32Ty(C->getContext()), true);
+            C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
+          } else
+            C = ConstantExpr::getAdd(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scMulExpr: {
+      const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+        // Don't bother with pointers at all.
+        if (C->getType()->isPointerTy()) return nullptr;
+        for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+          if (!C2 || C2->getType()->isPointerTy()) return nullptr;
+          C = ConstantExpr::getMul(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scUDivExpr: {
+      const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+      if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+        if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+          if (LHS->getType() == RHS->getType())
+            return ConstantExpr::getUDiv(LHS, RHS);
+      break;
+    }
+    case scSMaxExpr:
+    case scUMaxExpr:
+      break; // TODO: smax, umax.
+  }
+  return nullptr;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+  if (isa<SCEVConstant>(V)) return V;
+
+  // If this instruction is evolved from a constant-evolving PHI, compute the
+  // exit value from the loop without using SCEVs.
+  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+    if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+      const Loop *LI = this->LI[I->getParent()];
+      if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          if (PN->getParent() == LI->getHeader()) {
+            // Okay, there is no closed form solution for the PHI node.  Check
+            // to see if the loop that contains it has a known backedge-taken
+            // count.  If so, we may be able to force computation of the exit
+            // value.
+            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+            if (const SCEVConstant *BTCC =
+                  dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+              // Okay, we know how many times the containing loop executes.  If
+              // this is a constant evolving PHI node, get the final value at
+              // the specified iteration number.
+              Constant *RV =
+                  getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
+              if (RV) return getSCEV(RV);
+            }
+          }
+
+      // Okay, this is an expression that we cannot symbolically evaluate
+      // into a SCEV.  Check to see if it's possible to symbolically evaluate
+      // the arguments into constants, and if so, try to constant propagate the
+      // result.  This is particularly useful for computing loop exit values.
+      if (CanConstantFold(I)) {
+        SmallVector<Constant *, 4> Operands;
+        bool MadeImprovement = false;
+        for (Value *Op : I->operands()) {
+          if (Constant *C = dyn_cast<Constant>(Op)) {
+            Operands.push_back(C);
+            continue;
+          }
+
+          // If any of the operands is non-constant and if they are
+          // non-integer and non-pointer, don't even try to analyze them
+          // with scev techniques.
+          if (!isSCEVable(Op->getType()))
+            return V;
+
+          const SCEV *OrigV = getSCEV(Op);
+          const SCEV *OpV = getSCEVAtScope(OrigV, L);
+          MadeImprovement |= OrigV != OpV;
+
+          Constant *C = BuildConstantFromSCEV(OpV);
+          if (!C) return V;
+          if (C->getType() != Op->getType())
+            C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                              Op->getType(),
+                                                              false),
+                                      C, Op->getType());
+          Operands.push_back(C);
+        }
+
+        // Check to see if getSCEVAtScope actually made an improvement.
+        if (MadeImprovement) {
+          Constant *C = nullptr;
+          const DataLayout &DL = getDataLayout();
+          if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+            C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                                Operands[1], DL, &TLI);
+          else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+            if (!LI->isVolatile())
+              C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
+          } else
+            C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
+                                         DL, &TLI);
+          if (!C) return V;
+          return getSCEV(C);
+        }
+      }
+    }
+
+    // This is some other type of SCEVUnknown, just return it.
+    return V;
+  }
+
+  if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+      if (OpAtScope != Comm->getOperand(i)) {
+        // Okay, at least one of these operands is loop variant but might be
+        // foldable.  Build a new instance of the folded commutative expression.
+        SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+                                            Comm->op_begin()+i);
+        NewOps.push_back(OpAtScope);
+
+        for (++i; i != e; ++i) {
+          OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+          NewOps.push_back(OpAtScope);
+        }
+        if (isa<SCEVAddExpr>(Comm))
+          return getAddExpr(NewOps);
+        if (isa<SCEVMulExpr>(Comm))
+          return getMulExpr(NewOps);
+        if (isa<SCEVSMaxExpr>(Comm))
+          return getSMaxExpr(NewOps);
+        if (isa<SCEVUMaxExpr>(Comm))
+          return getUMaxExpr(NewOps);
+        llvm_unreachable("Unknown commutative SCEV type!");
+      }
+    }
+    // If we got here, all operands are loop invariant.
+    return Comm;
+  }
+
+  if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+    const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+    const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+    if (LHS == Div->getLHS() && RHS == Div->getRHS())
+      return Div;   // must be loop invariant
+    return getUDivExpr(LHS, RHS);
+  }
+
+  // If this is a loop recurrence for a loop that does not contain L, then we
+  // are dealing with the final value computed by the loop.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+    // First, attempt to evaluate each operand.
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+      if (OpAtScope == AddRec->getOperand(i))
+        continue;
+
+      // Okay, at least one of these operands is loop variant but might be
+      // foldable.  Build a new instance of the folded commutative expression.
+      SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+                                          AddRec->op_begin()+i);
+      NewOps.push_back(OpAtScope);
+      for (++i; i != e; ++i)
+        NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+      const SCEV *FoldedRec =
+        getAddRecExpr(NewOps, AddRec->getLoop(),
+                      AddRec->getNoWrapFlags(SCEV::FlagNW));
+      AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+      // The addrec may be folded to a nonrecurrence, for example, if the
+      // induction variable is multiplied by zero after constant folding. Go
+      // ahead and return the folded value.
+      if (!AddRec)
+        return FoldedRec;
+      break;
+    }
+
+    // If the scope is outside the addrec's loop, evaluate it by using the
+    // loop exit value of the addrec.
+    if (!AddRec->getLoop()->contains(L)) {
+      // To evaluate this recurrence, we need to know how many times the AddRec
+      // loop iterates.  Compute this now.
+      const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+      if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+      // Then, evaluate the AddRec.
+      return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+    }
+
+    return AddRec;
+  }
+
+  if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getZeroExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getSignExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getTruncateExpr(Op, Cast->getType());
+  }
+
+  llvm_unreachable("Unknown SCEV type!");
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+  return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+///     A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+                                               ScalarEvolution &SE) {
+  uint32_t BW = A.getBitWidth();
+  assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+  assert(A != 0 && "A must be non-zero.");
+
+  // 1. D = gcd(A, N)
+  //
+  // The gcd of A and N may have only one prime factor: 2. The number of
+  // trailing zeros in A is its multiplicity
+  uint32_t Mult2 = A.countTrailingZeros();
+  // D = 2^Mult2
+
+  // 2. Check if B is divisible by D.
+  //
+  // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+  // is not less than multiplicity of this prime factor for D.
+  if (B.countTrailingZeros() < Mult2)
+    return SE.getCouldNotCompute();
+
+  // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+  // modulo (N / D).
+  //
+  // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
+  // bit width during computations.
+  APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
+  APInt Mod(BW + 1, 0);
+  Mod.setBit(BW - Mult2);  // Mod = N / D
+  APInt I = AD.multiplicativeInverse(Mod);
+
+  // 4. Compute the minimum unsigned root of the equation:
+  // I * (B / D) mod (N / D)
+  APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+  // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+  // bits.
+  return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+  assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+  const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+  const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+  const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+  // We currently can only solve this if the coefficients are constants.
+  if (!LC || !MC || !NC) {
+    const SCEV *CNC = SE.getCouldNotCompute();
+    return std::make_pair(CNC, CNC);
+  }
+
+  uint32_t BitWidth = LC->getAPInt().getBitWidth();
+  const APInt &L = LC->getAPInt();
+  const APInt &M = MC->getAPInt();
+  const APInt &N = NC->getAPInt();
+  APInt Two(BitWidth, 2);
+  APInt Four(BitWidth, 4);
+
+  {
+    using namespace APIntOps;
+    const APInt& C = L;
+    // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+    // The B coefficient is M-N/2
+    APInt B(M);
+    B -= sdiv(N,Two);
+
+    // The A coefficient is N/2
+    APInt A(N.sdiv(Two));
+
+    // Compute the B^2-4ac term.
+    APInt SqrtTerm(B);
+    SqrtTerm *= B;
+    SqrtTerm -= Four * (A * C);
+
+    if (SqrtTerm.isNegative()) {
+      // The loop is provably infinite.
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
+    // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+    // integer value or else APInt::sqrt() will assert.
+    APInt SqrtVal(SqrtTerm.sqrt());
+
+    // Compute the two solutions for the quadratic formula.
+    // The divisions must be performed as signed divisions.
+    APInt NegB(-B);
+    APInt TwoA(A << 1);
+    if (TwoA.isMinValue()) {
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
+    LLVMContext &Context = SE.getContext();
+
+    ConstantInt *Solution1 =
+      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+    ConstantInt *Solution2 =
+      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+    return std::make_pair(SE.getConstant(Solution1),
+                          SE.getConstant(Solution2));
+  } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute.  If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0.  We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
+  // If the value is a constant
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    // If the value is already zero, the branch will execute zero times.
+    if (C->getValue()->isZero()) return C;
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+  if (!AddRec || AddRec->getLoop() != L)
+    return getCouldNotCompute();
+
+  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+  // the quadratic equation to solve it.
+  if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(AddRec, *this);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1 && R2) {
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB =
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+                                                      R1->getValue(),
+                                                      R2->getValue()))) {
+        if (!CB->getZExtValue())
+          std::swap(R1, R2);   // R1 is the minimum root now.
+
+        // We can only use this value if the chrec ends up with an exact zero
+        // value at this index.  When solving for "X*X != 5", for example, we
+        // should not accept a root of 2.
+        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+        if (Val->isZero())
+          return R1;  // We found a quadratic root!
+      }
+    }
+    return getCouldNotCompute();
+  }
+
+  // Otherwise we can only handle this if it is affine.
+  if (!AddRec->isAffine())
+    return getCouldNotCompute();
+
+  // If this is an affine expression, the execution count of this branch is
+  // the minimum unsigned root of the following equation:
+  //
+  //     Start + Step*N = 0 (mod 2^BW)
+  //
+  // equivalent to:
+  //
+  //             Step*N = -Start (mod 2^BW)
+  //
+  // where BW is the common bit width of Start and Step.
+
+  // Get the initial value for the loop.
+  const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+  const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+  // For now we handle only constant steps.
+  //
+  // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+  // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+  // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+  // We have not yet seen any such cases.
+  const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+  if (!StepC || StepC->getValue()->equalsInt(0))
+    return getCouldNotCompute();
+
+  // For positive steps (counting up until unsigned overflow):
+  //   N = -Start/Step (as unsigned)
+  // For negative steps (counting down to zero):
+  //   N = Start/-Step
+  // First compute the unsigned distance from zero in the direction of Step.
+  bool CountDown = StepC->getAPInt().isNegative();
+  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+  // Handle unitary steps, which cannot wraparound.
+  // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+  //   N = Distance (as unsigned)
+  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+    ConstantRange CR = getUnsignedRange(Start);
+    const SCEV *MaxBECount;
+    if (!CountDown && CR.getUnsignedMin().isMinValue())
+      // When counting up, the worst starting value is 1, not 0.
+      MaxBECount = CR.getUnsignedMax().isMinValue()
+        ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+        : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+    else
+      MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+                                         : -CR.getUnsignedMin());
+    return ExitLimit(Distance, MaxBECount);
+  }
+
+  // As a special case, handle the instance where Step is a positive power of
+  // two. In this case, determining whether Step divides Distance evenly can be
+  // done by counting and comparing the number of trailing zeros of Step and
+  // Distance.
+  if (!CountDown) {
+    const APInt &StepV = StepC->getAPInt();
+    // StepV.isPowerOf2() returns true if StepV is an positive power of two.  It
+    // also returns true if StepV is maximally negative (eg, INT_MIN), but that
+    // case is not handled as this code is guarded by !CountDown.
+    if (StepV.isPowerOf2() &&
+        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
+      // Here we've constrained the equation to be of the form
+      //
+      //   2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W)  ... (0)
+      //
+      // where we're operating on a W bit wide integer domain and k is
+      // non-negative.  The smallest unsigned solution for X is the trip count.
+      //
+      // (0) is equivalent to:
+      //
+      //      2^(N + k) * Distance' - 2^N * X = L * 2^W
+      // <=>  2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
+      // <=>  2^k * Distance' - X = L * 2^(W - N)
+      // <=>  2^k * Distance'     = L * 2^(W - N) + X    ... (1)
+      //
+      // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
+      // by 2^(W - N).
+      //
+      // <=>  X = 2^k * Distance' URem 2^(W - N)   ... (2)
+      //
+      // E.g. say we're solving
+      //
+      //   2 * Val = 2 * X  (in i8)   ... (3)
+      //
+      // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
+      //
+      // Note: It is tempting to solve (3) by setting X = Val, but Val is not
+      // necessarily the smallest unsigned value of X that satisfies (3).
+      // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
+      // is i8 1, not i8 -127
+
+      const auto *ModuloResult = getUDivExactExpr(Distance, Step);
+
+      // Since SCEV does not have a URem node, we construct one using a truncate
+      // and a zero extend.
+
+      unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
+      auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
+      auto *WideTy = Distance->getType();
+
+      return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+    }
+  }
+
+  // If the condition controls loop exit (the loop exits only if the expression
+  // is true) and the addition is no-wrap we can use unsigned divide to
+  // compute the backedge count.  In this case, the step may not divide the
+  // distance, but we don't care because if the condition is "missed" the loop
+  // will have undefined behavior due to wrapping.
+  if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+    const SCEV *Exact =
+        getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+    return ExitLimit(Exact, Exact);
+  }
+
+  // Then, try to solve the above equation provided that Start is constant.
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+    return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
+                                        *this);
+  return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute.  If not computable, return
+/// CouldNotCompute
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+  // Loops that look like: while (X == 0) are very strange indeed.  We don't
+  // handle them yet except for the trivial case.  This could be expanded in the
+  // future as needed.
+
+  // If the value is a constant, check to see if it is known to be non-zero
+  // already.  If so, the backedge will execute zero times.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    if (!C->getValue()->isNullValue())
+      return getZero(C->getType());
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  // We could implement others, but I really doubt anyone writes loops like
+  // this, and if they did, they would already be constant folded.
+  return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+  // If the block has a unique predecessor, then there is no path from the
+  // predecessor to the block that does not go through the direct edge
+  // from the predecessor to the block.
+  if (BasicBlock *Pred = BB->getSinglePredecessor())
+    return std::make_pair(Pred, BB);
+
+  // A loop's header is defined to be a block that dominates the loop.
+  // If the header has a unique predecessor outside the loop, it must be
+  // a block that has exactly one successor that can reach the loop.
+  if (Loop *L = LI.getLoopFor(BB))
+    return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+  return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+  // Quick check to see if they are the same SCEV.
+  if (A == B) return true;
+
+  auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
+    // Not all instructions that are "identical" compute the same value.  For
+    // instance, two distinct alloca instructions allocating the same type are
+    // identical and do not read memory; but compute distinct values.
+    return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
+  };
+
+  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+  // two different instructions with the same value. Check for this case.
+  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+    if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+      if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+        if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+          if (ComputesEqualValues(AI, BI))
+            return true;
+
+  // Otherwise assume they may have a different value.
+  return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                                           const SCEV *&LHS, const SCEV *&RHS,
+                                           unsigned Depth) {
+  bool Changed = false;
+
+  // If we hit the max recursion limit bail out.
+  if (Depth >= 3)
+    return false;
+
+  // Canonicalize a constant to the right side.
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+    // Check for both operands constant.
+    if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+      if (ConstantExpr::getICmp(Pred,
+                                LHSC->getValue(),
+                                RHSC->getValue())->isNullValue())
+        goto trivially_false;
+      else
+        goto trivially_true;
+    }
+    // Otherwise swap the operands to put the constant on the right.
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+    Changed = true;
+  }
+
+  // If we're comparing an addrec with a value which is loop-invariant in the
+  // addrec's loop, put the addrec on the left. Also make a dominance check,
+  // as both operands could be addrecs loop-invariant in each other's loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+    const Loop *L = AR->getLoop();
+    if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      Changed = true;
+    }
+  }
+
+  // If there's a constant operand, canonicalize comparisons with boundary
+  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getAPInt();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+      if (!RA)
+        if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+          if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+            if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+                ME->getOperand(0)->isAllOnesValue()) {
+              RHS = AE->getOperand(1);
+              LHS = ME->getOperand(1);
+              Changed = true;
+            }
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_UGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_ULT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SLT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+        Changed = true;
+       break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_false;
+      break;
+    }
+  }
+
+  // Check for obvious equality.
+  if (HasSameValue(LHS, RHS)) {
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      goto trivially_true;
+    if (ICmpInst::isFalseWhenEqual(Pred))
+      goto trivially_false;
+  }
+
+  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+  // adding or subtracting 1 from one of the operands.
+  switch (Pred) {
+  case ICmpInst::ICMP_SLE:
+    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       SCEV::FlagNSW);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_UGE:
+    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       SCEV::FlagNUW);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  // TODO: More simplifications are possible here.
+
+  // Recursively simplify until we either hit a recursion limit or nothing
+  // changes.
+  if (Changed)
+    return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+
+  return Changed;
+
+trivially_true:
+  // Return 0 == 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_EQ;
+  return true;
+
+trivially_false:
+  // Return 0 != 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_NE;
+  return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+  return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+  return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+  return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS) {
+  // Canonicalize the inputs first.
+  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+  // If LHS or RHS is an addrec, check to see if the condition is true in
+  // every iteration of the loop.
+  // If LHS and RHS are both addrec, both conditions must be true in
+  // every iteration of the loop.
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  bool LeftGuarded = false;
+  bool RightGuarded = false;
+  if (LAR) {
+    const Loop *L = LAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
+      if (!RAR) return true;
+      LeftGuarded = true;
+    }
+  }
+  if (RAR) {
+    const Loop *L = RAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
+      if (!LAR) return true;
+      RightGuarded = true;
+    }
+  }
+  if (LeftGuarded && RightGuarded)
+    return true;
+
+  if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
+    return true;
+
+  // Otherwise see what can be done with known constant ranges.
+  return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+                                           ICmpInst::Predicate Pred,
+                                           bool &Increasing) {
+  bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+
+#ifndef NDEBUG
+  // Verify an invariant: inverting the predicate should turn a monotonically
+  // increasing change to a monotonically decreasing one, and vice versa.
+  bool IncreasingSwapped;
+  bool ResultSwapped = isMonotonicPredicateImpl(
+      LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+
+  assert(Result == ResultSwapped && "should be able to analyze both!");
+  if (ResultSwapped)
+    assert(Increasing == !IncreasingSwapped &&
+           "monotonicity should flip as we flip the predicate");
+#endif
+
+  return Result;
+}
+
+bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+                                               ICmpInst::Predicate Pred,
+                                               bool &Increasing) {
+
+  // A zero step value for LHS means the induction variable is essentially a
+  // loop invariant value. We don't really depend on the predicate actually
+  // flipping from false to true (for increasing predicates, and the other way
+  // around for decreasing predicates), all we care about is that *if* the
+  // predicate changes then it only changes from false to true.
+  //
+  // A zero step value in itself is not very useful, but there may be places
+  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
+  // as general as possible.
+
+  switch (Pred) {
+  default:
+    return false; // Conservative answer
+
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
+      return false;
+
+    Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
+    return true;
+
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE: {
+    if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
+      return false;
+
+    const SCEV *Step = LHS->getStepRecurrence(*this);
+
+    if (isKnownNonNegative(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
+      return true;
+    }
+
+    if (isKnownNonPositive(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+      return true;
+    }
+
+    return false;
+  }
+
+  }
+
+  llvm_unreachable("switch has default clause!");
+}
+
+bool ScalarEvolution::isLoopInvariantPredicate(
+    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+    ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+    const SCEV *&InvariantRHS) {
+
+  // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+  if (!isLoopInvariant(RHS, L)) {
+    if (!isLoopInvariant(LHS, L))
+      return false;
+
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!ArLHS || ArLHS->getLoop() != L)
+    return false;
+
+  bool Increasing;
+  if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
+    return false;
+
+  // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
+  // true as the loop iterates, and the backedge is control dependent on
+  // "ArLHS `Pred` RHS" == true then we can reason as follows:
+  //
+  //   * if the predicate was false in the first iteration then the predicate
+  //     is never evaluated again, since the loop exits without taking the
+  //     backedge.
+  //   * if the predicate was true in the first iteration then it will
+  //     continue to be true for all future iterations since it is
+  //     monotonically increasing.
+  //
+  // For both the above possibilities, we can replace the loop varying
+  // predicate with its value on the first iteration of the loop (which is
+  // loop invariant).
+  //
+  // A similar reasoning applies for a monotonically decreasing predicate, by
+  // replacing true with false and false with true in the above two bullets.
+
+  auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
+
+  if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
+    return false;
+
+  InvariantPred = Pred;
+  InvariantLHS = ArLHS->getStart();
+  InvariantRHS = RHS;
+  return true;
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS) {
+  if (HasSameValue(LHS, RHS))
+    return ICmpInst::isTrueWhenEqual(Pred);
+
+  // This code is split out from isKnownPredicate because it is called from
+  // within isLoopEntryGuardedByCond.
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_SGT:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULT: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULE: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_NE: {
+    if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+      return true;
+    if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+      return true;
+
+    const SCEV *Diff = getMinusSCEV(LHS, RHS);
+    if (isKnownNonZero(Diff))
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_EQ:
+    // The check at the top of the function catches the case where
+    // the values are known to be equal.
+    break;
+  }
+  return false;
+}
+
+bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+                                                    const SCEV *LHS,
+                                                    const SCEV *RHS) {
+
+  // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
+  // Return Y via OutY.
+  auto MatchBinaryAddToConst =
+      [this](const SCEV *Result, const SCEV *X, APInt &OutY,
+             SCEV::NoWrapFlags ExpectedFlags) {
+    const SCEV *NonConstOp, *ConstOp;
+    SCEV::NoWrapFlags FlagsPresent;
+
+    if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
+        !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
+      return false;
+
+    OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
+    return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
+  };
+
+  APInt C;
+
+  switch (Pred) {
+  default:
+    break;
+
+  case ICmpInst::ICMP_SGE:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE:
+    // X s<= (X + C)<nsw> if C >= 0
+    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
+      return true;
+
+    // (X + C)<nsw> s<= X if C <= 0
+    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
+        !C.isStrictlyPositive())
+      return true;
+    break;
+
+  case ICmpInst::ICMP_SGT:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT:
+    // X s< (X + C)<nsw> if C > 0
+    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
+        C.isStrictlyPositive())
+      return true;
+
+    // (X + C)<nsw> s< X if C < 0
+    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
+                                                   const SCEV *LHS,
+                                                   const SCEV *RHS) {
+  if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
+    return false;
+
+  // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
+  // the stack can result in exponential time complexity.
+  SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
+
+  // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
+  //
+  // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
+  // isKnownPredicate.  isKnownPredicate is more powerful, but also more
+  // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
+  // interesting cases seen in practice.  We can consider "upgrading" L >= 0 to
+  // use isKnownPredicate later if needed.
+  return isKnownNonNegative(RHS) &&
+         isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
+         isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS.  This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+                                             ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return true;
+
+  if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+
+  BranchInst *LoopContinuePredicate =
+    dyn_cast<BranchInst>(Latch->getTerminator());
+  if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
+      isImpliedCond(Pred, LHS, RHS,
+                    LoopContinuePredicate->getCondition(),
+                    LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
+    return true;
+
+  // We don't want more than one activation of the following loops on the stack
+  // -- that can lead to O(n!) time complexity.
+  if (WalkingBEDominatingConds)
+    return false;
+
+  SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
+
+  // See if we can exploit a trip count to prove the predicate.
+  const auto &BETakenInfo = getBackedgeTakenInfo(L);
+  const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
+  if (LatchBECount != getCouldNotCompute()) {
+    // We know that Latch branches back to the loop header exactly
+    // LatchBECount times.  This means the backdege condition at Latch is
+    // equivalent to  "{0,+,1} u< LatchBECount".
+    Type *Ty = LatchBECount->getType();
+    auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
+    const SCEV *LoopCounter =
+      getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
+    if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
+                      LatchBECount))
+      return true;
+  }
+
+  // Check conditions due to any @llvm.assume intrinsics.
+  for (auto &AssumeVH : AC.assumptions()) {
+    if (!AssumeVH)
+      continue;
+    auto *CI = cast<CallInst>(AssumeVH);
+    if (!DT.dominates(CI, Latch->getTerminator()))
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+      return true;
+  }
+
+  // If the loop is not reachable from the entry block, we risk running into an
+  // infinite loop as we walk up into the dom tree.  These loops do not matter
+  // anyway, so we just return a conservative answer when we see them.
+  if (!DT.isReachableFromEntry(L->getHeader()))
+    return false;
+
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+       DTN != HeaderDTN; DTN = DTN->getIDom()) {
+
+    assert(DTN && "should reach the loop header before reaching the root!");
+
+    BasicBlock *BB = DTN->getBlock();
+    BasicBlock *PBB = BB->getSinglePredecessor();
+    if (!PBB)
+      continue;
+
+    BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+    if (!ContinuePredicate || !ContinuePredicate->isConditional())
+      continue;
+
+    Value *Condition = ContinuePredicate->getCondition();
+
+    // If we have an edge `E` within the loop body that dominates the only
+    // latch, the condition guarding `E` also guards the backedge.  This
+    // reasoning works only for loops with a single latch.
+
+    BasicBlockEdge DominatingEdge(PBB, BB);
+    if (DominatingEdge.isSingleEdge()) {
+      // We're constructively (and conservatively) enumerating edges within the
+      // loop body that dominate the latch.  The dominator tree better agree
+      // with us on this:
+      assert(DT.dominates(DominatingEdge, Latch) && "should be!");
+
+      if (isImpliedCond(Pred, LHS, RHS, Condition,
+                        BB != ContinuePredicate->getSuccessor(0)))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS.  This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return false;
+
+  if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
+  // Starting at the loop predecessor, climb up the predecessor chain, as long
+  // as there are predecessors that can be found that have unique successors
+  // leading to the original header.
+  for (std::pair<BasicBlock *, BasicBlock *>
+         Pair(L->getLoopPredecessor(), L->getHeader());
+       Pair.first;
+       Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+    BranchInst *LoopEntryPredicate =
+      dyn_cast<BranchInst>(Pair.first->getTerminator());
+    if (!LoopEntryPredicate ||
+        LoopEntryPredicate->isUnconditional())
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS,
+                      LoopEntryPredicate->getCondition(),
+                      LoopEntryPredicate->getSuccessor(0) != Pair.second))
+      return true;
+  }
+
+  // Check conditions due to any @llvm.assume intrinsics.
+  for (auto &AssumeVH : AC.assumptions()) {
+    if (!AssumeVH)
+      continue;
+    auto *CI = cast<CallInst>(AssumeVH);
+    if (!DT.dominates(CI, L->getHeader()))
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+      return true;
+  }
+
+  return false;
+}
+
+namespace {
+/// RAII wrapper to prevent recursive application of isImpliedCond.
+/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
+/// currently evaluating isImpliedCond.
+struct MarkPendingLoopPredicate {
+  Value *Cond;
+  DenseSet<Value*> &LoopPreds;
+  bool Pending;
+
+  MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
+    : Cond(C), LoopPreds(LP) {
+    Pending = !LoopPreds.insert(Cond).second;
+  }
+  ~MarkPendingLoopPredicate() {
+    if (!Pending)
+      LoopPreds.erase(Cond);
+  }
+};
+} // end anonymous namespace
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS,
+                                    Value *FoundCondValue,
+                                    bool Inverse) {
+  MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
+  if (Mark.Pending)
+    return false;
+
+  // Recursively handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+  if (!ICI) return false;
+
+  // Now that we found a conditional branch that dominates the loop or controls
+  // the loop latch. Check to see if it is the comparison we are looking for.
+  ICmpInst::Predicate FoundPred;
+  if (Inverse)
+    FoundPred = ICI->getInversePredicate();
+  else
+    FoundPred = ICI->getPredicate();
+
+  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+  return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
+}
+
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                    const SCEV *RHS,
+                                    ICmpInst::Predicate FoundPred,
+                                    const SCEV *FoundLHS,
+                                    const SCEV *FoundRHS) {
+  // Balance the types.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      LHS = getSignExtendExpr(LHS, FoundLHS->getType());
+      RHS = getSignExtendExpr(RHS, FoundLHS->getType());
+    } else {
+      LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
+      RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
+    }
+  } else if (getTypeSizeInBits(LHS->getType()) >
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(FoundPred)) {
+      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+    } else {
+      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+    }
+  }
+
+  // Canonicalize the query to match the way instcombine will have
+  // canonicalized the comparison.
+  if (SimplifyICmpOperands(Pred, LHS, RHS))
+    if (LHS == RHS)
+      return CmpInst::isTrueWhenEqual(Pred);
+  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+    if (FoundLHS == FoundRHS)
+      return CmpInst::isFalseWhenEqual(FoundPred);
+
+  // Check to see if we can make the LHS or RHS match.
+  if (LHS == FoundRHS || RHS == FoundLHS) {
+    if (isa<SCEVConstant>(RHS)) {
+      std::swap(FoundLHS, FoundRHS);
+      FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+    } else {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    }
+  }
+
+  // Check whether the found predicate is the same as the desired predicate.
+  if (FoundPred == Pred)
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+  // Check whether swapping the found predicate makes it the same as the
+  // desired predicate.
+  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+    if (isa<SCEVConstant>(RHS))
+      return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+    else
+      return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+                                   RHS, LHS, FoundLHS, FoundRHS);
+  }
+
+  // Unsigned comparison is the same as signed comparison when both the operands
+  // are non-negative.
+  if (CmpInst::isUnsigned(FoundPred) &&
+      CmpInst::getSignedPredicate(FoundPred) == Pred &&
+      isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+  // Check if we can make progress by sharpening ranges.
+  if (FoundPred == ICmpInst::ICMP_NE &&
+      (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
+
+    const SCEVConstant *C = nullptr;
+    const SCEV *V = nullptr;
+
+    if (isa<SCEVConstant>(FoundLHS)) {
+      C = cast<SCEVConstant>(FoundLHS);
+      V = FoundRHS;
+    } else {
+      C = cast<SCEVConstant>(FoundRHS);
+      V = FoundLHS;
+    }
+
+    // The guarding predicate tells us that C != V. If the known range
+    // of V is [C, t), we can sharpen the range to [C + 1, t).  The
+    // range we consider has to correspond to same signedness as the
+    // predicate we're interested in folding.
+
+    APInt Min = ICmpInst::isSigned(Pred) ?
+        getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+
+    if (Min == C->getAPInt()) {
+      // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
+      // This is true even if (Min + 1) wraps around -- in case of
+      // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
+
+      APInt SharperMin = Min + 1;
+
+      switch (Pred) {
+        case ICmpInst::ICMP_SGE:
+        case ICmpInst::ICMP_UGE:
+          // We know V `Pred` SharperMin.  If this implies LHS `Pred`
+          // RHS, we're done.
+          if (isImpliedCondOperands(Pred, LHS, RHS, V,
+                                    getConstant(SharperMin)))
+            return true;
+
+        case ICmpInst::ICMP_SGT:
+        case ICmpInst::ICMP_UGT:
+          // We know from the range information that (V `Pred` Min ||
+          // V == Min).  We know from the guarding condition that !(V
+          // == Min).  This gives us
+          //
+          //       V `Pred` Min || V == Min && !(V == Min)
+          //   =>  V `Pred` Min
+          //
+          // If V `Pred` Min implies LHS `Pred` RHS, we're done.
+
+          if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
+            return true;
+
+        default:
+          // No change
+          break;
+      }
+    }
+  }
+
+  // Check whether the actual condition is beyond sufficient.
+  if (FoundPred == ICmpInst::ICMP_EQ)
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+  if (Pred == ICmpInst::ICMP_NE)
+    if (!ICmpInst::isTrueWhenEqual(FoundPred))
+      if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+
+  // Otherwise assume the worst.
+  return false;
+}
+
+bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
+                                     const SCEV *&L, const SCEV *&R,
+                                     SCEV::NoWrapFlags &Flags) {
+  const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
+  if (!AE || AE->getNumOperands() != 2)
+    return false;
+
+  L = AE->getOperand(0);
+  R = AE->getOperand(1);
+  Flags = AE->getNoWrapFlags();
+  return true;
+}
+
+bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
+                                                const SCEV *More,
+                                                APInt &C) {
+  // We avoid subtracting expressions here because this function is usually
+  // fairly deep in the call stack (i.e. is called many times).
+
+  if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
+    const auto *LAR = cast<SCEVAddRecExpr>(Less);
+    const auto *MAR = cast<SCEVAddRecExpr>(More);
+
+    if (LAR->getLoop() != MAR->getLoop())
+      return false;
+
+    // We look at affine expressions only; not for correctness but to keep
+    // getStepRecurrence cheap.
+    if (!LAR->isAffine() || !MAR->isAffine())
+      return false;
+
+    if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
+      return false;
+
+    Less = LAR->getStart();
+    More = MAR->getStart();
+
+    // fall through
+  }
+
+  if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
+    const auto &M = cast<SCEVConstant>(More)->getAPInt();
+    const auto &L = cast<SCEVConstant>(Less)->getAPInt();
+    C = M - L;
+    return true;
+  }
+
+  const SCEV *L, *R;
+  SCEV::NoWrapFlags Flags;
+  if (splitBinaryAdd(Less, L, R, Flags))
+    if (const auto *LC = dyn_cast<SCEVConstant>(L))
+      if (R == More) {
+        C = -(LC->getAPInt());
+        return true;
+      }
+
+  if (splitBinaryAdd(More, L, R, Flags))
+    if (const auto *LC = dyn_cast<SCEVConstant>(L))
+      if (R == Less) {
+        C = LC->getAPInt();
+        return true;
+      }
+
+  return false;
+}
+
+bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
+    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+    const SCEV *FoundLHS, const SCEV *FoundRHS) {
+  if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
+    return false;
+
+  const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!AddRecLHS)
+    return false;
+
+  const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
+  if (!AddRecFoundLHS)
+    return false;
+
+  // We'd like to let SCEV reason about control dependencies, so we constrain
+  // both the inequalities to be about add recurrences on the same loop.  This
+  // way we can use isLoopEntryGuardedByCond later.
+
+  const Loop *L = AddRecFoundLHS->getLoop();
+  if (L != AddRecLHS->getLoop())
+    return false;
+
+  //  FoundLHS u< FoundRHS u< -C =>  (FoundLHS + C) u< (FoundRHS + C) ... (1)
+  //
+  //  FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
+  //                                                                  ... (2)
+  //
+  // Informal proof for (2), assuming (1) [*]:
+  //
+  // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
+  //
+  // Then
+  //
+  //       FoundLHS s< FoundRHS s< INT_MIN - C
+  // <=>  (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C   [ using (3) ]
+  // <=>  (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
+  // <=>  (FoundLHS + INT_MIN + C + INT_MIN) s<
+  //                        (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
+  // <=>  FoundLHS + C s< FoundRHS + C
+  //
+  // [*]: (1) can be proved by ruling out overflow.
+  //
+  // [**]: This can be proved by analyzing all the four possibilities:
+  //    (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
+  //    (A s>= 0, B s>= 0).
+  //
+  // Note:
+  // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
+  // will not sign underflow.  For instance, say FoundLHS = (i8 -128), FoundRHS
+  // = (i8 -127) and C = (i8 -100).  Then INT_MIN - C = (i8 -28), and FoundRHS
+  // s< (INT_MIN - C).  Lack of sign overflow / underflow in "FoundRHS + C" is
+  // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
+  // C)".
+
+  APInt LDiff, RDiff;
+  if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
+      !computeConstantDifference(FoundRHS, RHS, RDiff) ||
+      LDiff != RDiff)
+    return false;
+
+  if (LDiff == 0)
+    return true;
+
+  APInt FoundRHSLimit;
+
+  if (Pred == CmpInst::ICMP_ULT) {
+    FoundRHSLimit = -RDiff;
+  } else {
+    assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
+    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+  }
+
+  // Try to prove (1) or (2), as needed.
+  return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
+                                  getConstant(FoundRHSLimit));
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS) {
+  if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
+  if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
+  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     FoundLHS, FoundRHS) ||
+         // ~x < ~y --> x > y
+         isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     getNotSCEV(FoundRHS),
+                                     getNotSCEV(FoundLHS));
+}
+
+
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+  if (!Add || Add->getNumOperands() != 2 ||
+      !Add->getOperand(0)->isAllOnesValue())
+    return nullptr;
+
+  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+      !AddRHS->getOperand(0)->isAllOnesValue())
+    return nullptr;
+
+  return AddRHS->getOperand(1);
+}
+
+
+/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
+                              const SCEV *Candidate) {
+  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
+  if (!MaxExpr) return false;
+
+  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
+}
+
+
+/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMinConsistingOf(ScalarEvolution &SE,
+                              const SCEV *MaybeMinExpr,
+                              const SCEV *Candidate) {
+  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
+  if (!MaybeMaxExpr)
+    return false;
+
+  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+}
+
+static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+                                           ICmpInst::Predicate Pred,
+                                           const SCEV *LHS, const SCEV *RHS) {
+
+  // If both sides are affine addrecs for the same loop, with equal
+  // steps, and we know the recurrences don't wrap, then we only
+  // need to check the predicate on the starting values.
+
+  if (!ICmpInst::isRelational(Pred))
+    return false;
+
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!LAR)
+    return false;
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  if (!RAR)
+    return false;
+  if (LAR->getLoop() != RAR->getLoop())
+    return false;
+  if (!LAR->isAffine() || !RAR->isAffine())
+    return false;
+
+  if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
+    return false;
+
+  SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
+                         SCEV::FlagNSW : SCEV::FlagNUW;
+  if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
+    return false;
+
+  return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
+}
+
+/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
+/// expression?
+static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
+                                        ICmpInst::Predicate Pred,
+                                        const SCEV *LHS, const SCEV *RHS) {
+  switch (Pred) {
+  default:
+    return false;
+
+  case ICmpInst::ICMP_SGE:
+    std::swap(LHS, RHS);
+    // fall through
+  case ICmpInst::ICMP_SLE:
+    return
+      // min(A, ...) <= A
+      IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
+      // A <= max(A, ...)
+      IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+
+  case ICmpInst::ICMP_UGE:
+    std::swap(LHS, RHS);
+    // fall through
+  case ICmpInst::ICMP_ULE:
+    return
+      // min(A, ...) <= A
+      IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
+      // A <= max(A, ...)
+      IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+  }
+
+  llvm_unreachable("covered switch fell through?!");
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS) {
+  auto IsKnownPredicateFull =
+      [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
+    return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
+           IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+           IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+           isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
+  };
+
+  switch (Pred) {
+  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE:
+    if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
+/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
+bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
+                                                     const SCEV *LHS,
+                                                     const SCEV *RHS,
+                                                     const SCEV *FoundLHS,
+                                                     const SCEV *FoundRHS) {
+  if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
+    // The restriction on `FoundRHS` be lifted easily -- it exists only to
+    // reduce the compile time impact of this optimization.
+    return false;
+
+  const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
+  if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
+      !isa<SCEVConstant>(AddLHS->getOperand(0)))
+    return false;
+
+  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
+
+  // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
+  // antecedent "`FoundLHS` `Pred` `FoundRHS`".
+  ConstantRange FoundLHSRange =
+      ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
+
+  // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
+  // for `LHS`:
+  APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
+  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+
+  // We can also compute the range of values for `LHS` that satisfy the
+  // consequent, "`LHS` `Pred` `RHS`":
+  APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
+  ConstantRange SatisfyingLHSRange =
+      ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
+
+  // The antecedent implies the consequent if every value of `LHS` that
+  // satisfies the antecedent also satisfies the consequent.
+  return SatisfyingLHSRange.contains(LHSRange);
+}
+
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getOne(Stride->getType());
+
+  if (IsSigned) {
+    APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+    APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                                .getSignedMax();
+
+    // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+    return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+  }
+
+  APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+  APInt MaxValue = APInt::getMaxValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                              .getUnsignedMax();
+
+  // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+  return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getOne(Stride->getType());
+
+  if (IsSigned) {
+    APInt MinRHS = getSignedRange(RHS).getSignedMin();
+    APInt MinValue = APInt::getSignedMinValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                               .getSignedMax();
+
+    // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+    return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
+  }
+
+  APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+  APInt MinValue = APInt::getMinValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                            .getUnsignedMax();
+
+  // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+  return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+                                            bool Equality) {
+  const SCEV *One = getOne(Step->getType());
+  Delta = Equality ? getAddExpr(Delta, Step)
+                   : getAddExpr(Delta, getMinusSCEV(Step, One));
+  return getUDivExpr(Delta, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute.  If not computable, return
+/// CouldNotCompute.
+///
+/// @param ControlsExit is true when the LHS < RHS condition directly controls
+/// the branch (loops exits only if condition is true). In this case, we can use
+/// NoWrapFlags to skip overflow checks.
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                  const Loop *L, bool IsSigned,
+                                  bool ControlsExit) {
+  // We handle only IV < Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
+
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
+    return getCouldNotCompute();
+
+  bool NoWrap = ControlsExit &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+  const SCEV *Stride = IV->getStepRecurrence(*this);
+
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
+
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+                                      : ICmpInst::ICMP_ULT;
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
+    const SCEV *Diff = getMinusSCEV(RHS, Start);
+    // If we have NoWrap set, then we can assume that the increment won't
+    // overflow, in which case if RHS - Start is a constant, we don't need to
+    // do a max operation since we can just figure it out statically
+    if (NoWrap && isa<SCEVConstant>(Diff)) {
+      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
+      if (D.isNegative())
+        End = Start;
+    } else
+      End = IsSigned ? getSMaxExpr(RHS, Start)
+                     : getUMaxExpr(RHS, Start);
+  }
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+                            : getUnsignedRange(Start).getUnsignedMin();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+  // Although End can be a MAX expression we estimate MaxEnd considering only
+  // the case End = RHS. This is safe because in the other case (End - Start)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MaxEnd =
+    IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+             : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+  const SCEV *MaxBECount;
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+                                     const Loop *L, bool IsSigned,
+                                     bool ControlsExit) {
+  // We handle only IV > Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
+
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
+    return getCouldNotCompute();
+
+  bool NoWrap = ControlsExit &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+  const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
+
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+                                      : ICmpInst::ICMP_UGT;
+
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
+    const SCEV *Diff = getMinusSCEV(RHS, Start);
+    // If we have NoWrap set, then we can assume that the increment won't
+    // overflow, in which case if RHS - Start is a constant, we don't need to
+    // do a max operation since we can just figure it out statically
+    if (NoWrap && isa<SCEVConstant>(Diff)) {
+      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
+      if (!D.isNegative())
+        End = Start;
+    } else
+      End = IsSigned ? getSMinExpr(RHS, Start)
+                     : getUMinExpr(RHS, Start);
+  }
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+  APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+                            : getUnsignedRange(Start).getUnsignedMax();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+                         : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+  // Although End can be a MIN expression we estimate MinEnd considering only
+  // the case End = RHS. This is safe because in the other case (Start - End)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MinEnd =
+    IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+             : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+  const SCEV *MaxBECount = getCouldNotCompute();
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount);
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range.  Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+                                                    ScalarEvolution &SE) const {
+  if (Range.isFullSet())  // Infinite loop.
+    return SE.getCouldNotCompute();
+
+  // If the start is a non-zero constant, shift the range to simplify things.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+    if (!SC->getValue()->isZero()) {
+      SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+      Operands[0] = SE.getZero(SC->getType());
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+                                             getNoWrapFlags(FlagNW));
+      if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
+        return ShiftedAddRec->getNumIterationsInRange(
+            Range.subtract(SC->getAPInt()), SE);
+      // This is strange and shouldn't happen.
+      return SE.getCouldNotCompute();
+    }
+
+  // The only time we can solve this is when we have all constant indices.
+  // Otherwise, we cannot determine the overflow conditions.
+  if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
+    return SE.getCouldNotCompute();
+
+  // Okay at this point we know that all elements of the chrec are constants and
+  // that the start element is zero.
+
+  // First check to see if the range contains zero.  If not, the first
+  // iteration exits.
+  unsigned BitWidth = SE.getTypeSizeInBits(getType());
+  if (!Range.contains(APInt(BitWidth, 0)))
+    return SE.getZero(getType());
+
+  if (isAffine()) {
+    // If this is an affine expression then we have this situation:
+    //   Solve {0,+,A} in Range  ===  Ax in Range
+
+    // We know that zero is in the range.  If A is positive then we know that
+    // the upper value of the range must be the first possible exit value.
+    // If A is negative then the lower of the range is the last possible loop
+    // value.  Also note that we already checked for a full range.
+    APInt One(BitWidth,1);
+    APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
+    APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+    // The exit value should be (End+A)/A.
+    APInt ExitVal = (End + A).udiv(A);
+    ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+    // Evaluate at the exit value.  If we really did fall out of the valid
+    // range, then we computed our trip count, otherwise wrap around or other
+    // things must have happened.
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+    if (Range.contains(Val->getValue()))
+      return SE.getCouldNotCompute();  // Something strange happened
+
+    // Ensure that the previous value is in the range.  This is a sanity check.
+    assert(Range.contains(
+           EvaluateConstantChrecAtConstant(this,
+           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+           "Linear scev computation is off in a bad way!");
+    return SE.getConstant(ExitValue);
+  } else if (isQuadratic()) {
+    // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+    // quadratic equation to solve it.  To do this, we must frame our problem in
+    // terms of figuring out when zero is crossed, instead of when
+    // Range.getUpper() is crossed.
+    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+    NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+                                             // getNoWrapFlags(FlagNW)
+                                             FlagAnyWrap);
+
+    // Next, solve the constructed addrec
+    auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1) {
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
+              ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
+        if (!CB->getZExtValue())
+          std::swap(R1, R2);   // R1 is the minimum root now.
+
+        // Make sure the root is not off by one.  The returned iteration should
+        // not be in the range, but the previous one should be.  When solving
+        // for "X*X < 5", for example, we should not return a root of 2.
+        ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+                                                             R1->getValue(),
+                                                             SE);
+        if (Range.contains(R1Val->getValue())) {
+          // The next iteration must be out of the range...
+          ConstantInt *NextVal =
+              ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
+
+          R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+          if (!Range.contains(R1Val->getValue()))
+            return SE.getConstant(NextVal);
+          return SE.getCouldNotCompute();  // Something strange happened
+        }
+
+        // If R1 was not in the range, then it is a good return value.  Make
+        // sure that R1-1 WAS in the range though, just in case.
+        ConstantInt *NextVal =
+            ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
+        R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+        if (Range.contains(R1Val->getValue()))
+          return R1;
+        return SE.getCouldNotCompute();  // Something strange happened
+      }
+    }
+  }
+
+  return SE.getCouldNotCompute();
+}
+
+namespace {
+struct FindUndefs {
+  bool Found;
+  FindUndefs() : Found(false) {}
+
+  bool follow(const SCEV *S) {
+    if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
+      if (isa<UndefValue>(C->getValue()))
+        Found = true;
+    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+      if (isa<UndefValue>(C->getValue()))
+        Found = true;
+    }
+
+    // Keep looking if we haven't found it yet.
+    return !Found;
+  }
+  bool isDone() const {
+    // Stop recursion if we have found an undef.
+    return Found;
+  }
+};
+}
+
+// Return true when S contains at least an undef value.
+static inline bool
+containsUndefs(const SCEV *S) {
+  FindUndefs F;
+  SCEVTraversal<FindUndefs> ST(F);
+  ST.visitAll(S);
+
+  return F.Found;
+}
+
+namespace {
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+  ScalarEvolution &SE;
+  SmallVectorImpl<const SCEV *> &Strides;
+
+  SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+      : SE(SE), Strides(S) {}
+
+  bool follow(const SCEV *S) {
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      Strides.push_back(AR->getStepRecurrence(SE));
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+  SmallVectorImpl<const SCEV *> &Terms;
+
+  SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
+      : Terms(T) {}
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+      if (!containsUndefs(S))
+        Terms.push_back(S);
+
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+  bool &ContainsAddRec;
+
+  SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+   ContainsAddRec = false;
+  }
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVAddRecExpr>(S)) {
+      ContainsAddRec = true;
+
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+//  8 * (100 +  %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+  SmallVectorImpl<const SCEV *> &Terms;
+  ScalarEvolution &SE;
+
+  SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
+      : Terms(T), SE(SE) {}
+
+  bool follow(const SCEV *S) {
+    if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+      bool HasAddRec = false;
+      SmallVector<const SCEV *, 0> Operands;
+      for (auto Op : Mul->operands()) {
+        if (isa<SCEVUnknown>(Op)) {
+          Operands.push_back(Op);
+        } else {
+          bool ContainsAddRec;
+          SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+          visitAll(Op, ContiansAddRec);
+          HasAddRec |= ContainsAddRec;
+        }
+      }
+      if (Operands.size() == 0)
+        return true;
+
+      if (!HasAddRec)
+        return false;
+
+      Terms.push_back(SE.getMulExpr(Operands));
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+}
+
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+///   1) The strides of AddRec expressions.
+///   2) Unknowns that are multiplied with AddRec expressions.
+void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
+    SmallVectorImpl<const SCEV *> &Terms) {
+  SmallVector<const SCEV *, 4> Strides;
+  SCEVCollectStrides StrideCollector(*this, Strides);
+  visitAll(Expr, StrideCollector);
+
+  DEBUG({
+      dbgs() << "Strides:\n";
+      for (const SCEV *S : Strides)
+        dbgs() << *S << "\n";
+    });
+
+  for (const SCEV *S : Strides) {
+    SCEVCollectTerms TermCollector(Terms);
+    visitAll(S, TermCollector);
+  }
+
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
+
+  SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
+  visitAll(Expr, MulCollector);
+}
+
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+                                   SmallVectorImpl<const SCEV *> &Terms,
+                                   SmallVectorImpl<const SCEV *> &Sizes) {
+  int Last = Terms.size() - 1;
+  const SCEV *Step = Terms[Last];
+
+  // End of recursion.
+  if (Last == 0) {
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+      SmallVector<const SCEV *, 2> Qs;
+      for (const SCEV *Op : M->operands())
+        if (!isa<SCEVConstant>(Op))
+          Qs.push_back(Op);
+
+      Step = SE.getMulExpr(Qs);
+    }
+
+    Sizes.push_back(Step);
+    return true;
+  }
+
+  for (const SCEV *&Term : Terms) {
+    // Normalize the terms before the next call to findArrayDimensionsRec.
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Term, Step, &Q, &R);
+
+    // Bail out when GCD does not evenly divide one of the terms.
+    if (!R->isZero())
+      return false;
+
+    Term = Q;
+  }
+
+  // Remove all SCEVConstants.
+  Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
+                return isa<SCEVConstant>(E);
+              }),
+              Terms.end());
+
+  if (Terms.size() > 0)
+    if (!findArrayDimensionsRec(SE, Terms, Sizes))
+      return false;
+
+  Sizes.push_back(Step);
+  return true;
+}
+
+// Returns true when S contains at least a SCEVUnknown parameter.
+static inline bool
+containsParameters(const SCEV *S) {
+  struct FindParameter {
+    bool FoundParameter;
+    FindParameter() : FoundParameter(false) {}
+
+    bool follow(const SCEV *S) {
+      if (isa<SCEVUnknown>(S)) {
+        FoundParameter = true;
+        // Stop recursion: we found a parameter.
+        return false;
+      }
+      // Keep looking.
+      return true;
+    }
+    bool isDone() const {
+      // Stop recursion if we have found a parameter.
+      return FoundParameter;
+    }
+  };
+
+  FindParameter F;
+  SCEVTraversal<FindParameter> ST(F);
+  ST.visitAll(S);
+
+  return F.FoundParameter;
+}
+
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool
+containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+  for (const SCEV *T : Terms)
+    if (containsParameters(T))
+      return true;
+  return false;
+}
+
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+  if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+    return Expr->getNumOperands();
+  return 1;
+}
+
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+  if (isa<SCEVConstant>(T))
+    return nullptr;
+
+  if (isa<SCEVUnknown>(T))
+    return T;
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+    SmallVector<const SCEV *, 2> Factors;
+    for (const SCEV *Op : M->operands())
+      if (!isa<SCEVConstant>(Op))
+        Factors.push_back(Op);
+
+    return SE.getMulExpr(Factors);
+  }
+
+  return T;
+}
+
+/// Return the size of an element read or written by Inst.
+const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
+  Type *Ty;
+  if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+    Ty = Store->getValueOperand()->getType();
+  else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+    Ty = Load->getType();
+  else
+    return nullptr;
+
+  Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
+  return getSizeOfExpr(ETy, Ty);
+}
+
+/// Second step of delinearization: compute the array dimensions Sizes from the
+/// set of Terms extracted from the memory access function of this SCEVAddRec.
+void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+                                          SmallVectorImpl<const SCEV *> &Sizes,
+                                          const SCEV *ElementSize) const {
+
+  if (Terms.size() < 1 || !ElementSize)
+    return;
+
+  // Early return when Terms do not contain parameters: we do not delinearize
+  // non parametric SCEVs.
+  if (!containsParameters(Terms))
+    return;
+
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
+
+  // Remove duplicates.
+  std::sort(Terms.begin(), Terms.end());
+  Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
+
+  // Put larger terms first.
+  std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+    return numberOfTerms(LHS) > numberOfTerms(RHS);
+  });
+
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  // Try to divide all terms by the element size. If term is not divisible by
+  // element size, proceed with the original term.
+  for (const SCEV *&Term : Terms) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+    if (!Q->isZero())
+      Term = Q;
+  }
+
+  SmallVector<const SCEV *, 4> NewTerms;
+
+  // Remove constant factors.
+  for (const SCEV *T : Terms)
+    if (const SCEV *NewT = removeConstantFactors(SE, T))
+      NewTerms.push_back(NewT);
+
+  DEBUG({
+      dbgs() << "Terms after sorting:\n";
+      for (const SCEV *T : NewTerms)
+        dbgs() << *T << "\n";
+    });
+
+  if (NewTerms.empty() ||
+      !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+    Sizes.clear();
+    return;
+  }
+
+  // The last element to be pushed into Sizes is the size of an element.
+  Sizes.push_back(ElementSize);
+
+  DEBUG({
+      dbgs() << "Sizes:\n";
+      for (const SCEV *S : Sizes)
+        dbgs() << *S << "\n";
+    });
+}
+
+/// Third step of delinearization: compute the access functions for the
+/// Subscripts based on the dimensions in Sizes.
+void ScalarEvolution::computeAccessFunctions(
+    const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
+    SmallVectorImpl<const SCEV *> &Sizes) {
+
+  // Early exit in case this SCEV is not an affine multivariate function.
+  if (Sizes.empty())
+    return;
+
+  if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
+    if (!AR->isAffine())
+      return;
+
+  const SCEV *Res = Expr;
+  int Last = Sizes.size() - 1;
+  for (int i = Last; i >= 0; i--) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
+
+    DEBUG({
+        dbgs() << "Res: " << *Res << "\n";
+        dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+        dbgs() << "Res divided by Sizes[i]:\n";
+        dbgs() << "Quotient: " << *Q << "\n";
+        dbgs() << "Remainder: " << *R << "\n";
+      });
+
+    Res = Q;
+
+    // Do not record the last subscript corresponding to the size of elements in
+    // the array.
+    if (i == Last) {
+
+      // Bail out if the remainder is too complex.
+      if (isa<SCEVAddRecExpr>(R)) {
+        Subscripts.clear();
+        Sizes.clear();
+        return;
+      }
+
+      continue;
+    }
+
+    // Record the access function for the current subscript.
+    Subscripts.push_back(R);
+  }
+
+  // Also push in last position the remainder of the last division: it will be
+  // the access function of the innermost dimension.
+  Subscripts.push_back(Res);
+
+  std::reverse(Subscripts.begin(), Subscripts.end());
+
+  DEBUG({
+      dbgs() << "Subscripts:\n";
+      for (const SCEV *S : Subscripts)
+        dbgs() << *S << "\n";
+    });
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array.  The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride.  When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+///  void foo(long n, long m, long o, double A[n][m][o]) {
+///
+///    for (long i = 0; i < n; i++)
+///      for (long j = 0; j < m; j++)
+///        for (long k = 0; k < o; k++)
+///          A[i][j][k] = 1.0;
+///  }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+///  CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+
+void ScalarEvolution::delinearize(const SCEV *Expr,
+                                 SmallVectorImpl<const SCEV *> &Subscripts,
+                                 SmallVectorImpl<const SCEV *> &Sizes,
+                                 const SCEV *ElementSize) {
+  // First step: collect parametric terms.
+  SmallVector<const SCEV *, 4> Terms;
+  collectParametricTerms(Expr, Terms);
+
+  if (Terms.empty())
+    return;
+
+  // Second step: find subscript sizes.
+  findArrayDimensions(Terms, Sizes, ElementSize);
+
+  if (Sizes.empty())
+    return;
+
+  // Third step: compute the access functions for each subscript.
+  computeAccessFunctions(Expr, Subscripts, Sizes);
+
+  if (Subscripts.empty())
+    return;
+
+  DEBUG({
+      dbgs() << "succeeded to delinearize " << *Expr << "\n";
+      dbgs() << "ArrayDecl[UnknownSize]";
+      for (const SCEV *S : Sizes)
+        dbgs() << "[" << *S << "]";
+
+      dbgs() << "\nArrayRef";
+      for (const SCEV *S : Subscripts)
+        dbgs() << "[" << *S << "]";
+      dbgs() << "\n";
+    });
+}
+
+//===----------------------------------------------------------------------===//
+//                   SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+  if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(getValPtr());
+  // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+  // Forget all the expressions associated with users of the old value,
+  // so that future queries will recompute the expressions using the new
+  // value.
+  Value *Old = getValPtr();
+  SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
+  SmallPtrSet<User *, 8> Visited;
+  while (!Worklist.empty()) {
+    User *U = Worklist.pop_back_val();
+    // Deleting the Old value will cause this to dangle. Postpone
+    // that until everything else is done.
+    if (U == Old)
+      continue;
+    if (!Visited.insert(U).second)
+      continue;
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      SE->ConstantEvolutionLoopExitValue.erase(PN);
+    SE->ValueExprMap.erase(U);
+    Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
+  }
+  // Delete the Old value.
+  if (PHINode *PN = dyn_cast<PHINode>(Old))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(Old);
+  // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+  : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+//                   ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
+                                 AssumptionCache &AC, DominatorTree &DT,
+                                 LoopInfo &LI)
+    : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
+      CouldNotCompute(new SCEVCouldNotCompute()),
+      WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+      ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
+      FirstUnknown(nullptr) {}
+
+ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
+    : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
+      CouldNotCompute(std::move(Arg.CouldNotCompute)),
+      ValueExprMap(std::move(Arg.ValueExprMap)),
+      WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+      BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+      ConstantEvolutionLoopExitValue(
+          std::move(Arg.ConstantEvolutionLoopExitValue)),
+      ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+      LoopDispositions(std::move(Arg.LoopDispositions)),
+      BlockDispositions(std::move(Arg.BlockDispositions)),
+      UnsignedRanges(std::move(Arg.UnsignedRanges)),
+      SignedRanges(std::move(Arg.SignedRanges)),
+      UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
+      UniquePreds(std::move(Arg.UniquePreds)),
+      SCEVAllocator(std::move(Arg.SCEVAllocator)),
+      FirstUnknown(Arg.FirstUnknown) {
+  Arg.FirstUnknown = nullptr;
+}
+
+ScalarEvolution::~ScalarEvolution() {
+  // Iterate through all the SCEVUnknown instances and call their
+  // destructors, so that they release their references to their values.
+  for (SCEVUnknown *U = FirstUnknown; U;) {
+    SCEVUnknown *Tmp = U;
+    U = U->Next;
+    Tmp->~SCEVUnknown();
+  }
+  FirstUnknown = nullptr;
+
+  ValueExprMap.clear();
+
+  // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+  // that a loop had multiple computable exits.
+  for (auto &BTCI : BackedgeTakenCounts)
+    BTCI.second.clear();
+
+  assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+  assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
+  assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+  return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+                          const Loop *L) {
+  // Print all inner loops first
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    PrintLoopInfo(OS, SE, *I);
+
+  OS << "Loop ";
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+  OS << ": ";
+
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() != 1)
+    OS << "<multiple exits> ";
+
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable backedge-taken count. ";
+  }
+
+  OS << "\n"
+        "Loop ";
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+  OS << ": ";
+
+  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable max backedge-taken count. ";
+  }
+
+  OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS) const {
+  // ScalarEvolution's implementation of the print method is to print
+  // out SCEV values of all instructions that are interesting. Doing
+  // this potentially causes it to create new SCEV objects though,
+  // which technically conflicts with the const qualifier. This isn't
+  // observable from outside the class though, so casting away the
+  // const isn't dangerous.
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  OS << "Classifying expressions for: ";
+  F.printAsOperand(OS, /*PrintType=*/false);
+  OS << "\n";
+  for (Instruction &I : instructions(F))
+    if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+      OS << I << '\n';
+      OS << "  -->  ";
+      const SCEV *SV = SE.getSCEV(&I);
+      SV->print(OS);
+      if (!isa<SCEVCouldNotCompute>(SV)) {
+        OS << " U: ";
+        SE.getUnsignedRange(SV).print(OS);
+        OS << " S: ";
+        SE.getSignedRange(SV).print(OS);
+      }
+
+      const Loop *L = LI.getLoopFor(I.getParent());
+
+      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+      if (AtUse != SV) {
+        OS << "  -->  ";
+        AtUse->print(OS);
+        if (!isa<SCEVCouldNotCompute>(AtUse)) {
+          OS << " U: ";
+          SE.getUnsignedRange(AtUse).print(OS);
+          OS << " S: ";
+          SE.getSignedRange(AtUse).print(OS);
+        }
+      }
+
+      if (L) {
+        OS << "\t\t" "Exits: ";
+        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+        if (!SE.isLoopInvariant(ExitValue, L)) {
+          OS << "<<Unknown>>";
+        } else {
+          OS << *ExitValue;
+        }
+      }
+
+      OS << "\n";
+    }
+
+  OS << "Determining loop execution counts for: ";
+  F.printAsOperand(OS, /*PrintType=*/false);
+  OS << "\n";
+  for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
+    PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+  auto &Values = LoopDispositions[S];
+  for (auto &V : Values) {
+    if (V.getPointer() == L)
+      return V.getInt();
+  }
+  Values.emplace_back(L, LoopVariant);
+  LoopDisposition D = computeLoopDisposition(S, L);
+  auto &Values2 = LoopDispositions[S];
+  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+    if (V.getPointer() == L) {
+      V.setInt(D);
+      break;
+    }
+  }
+  return D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+  case scConstant:
+    return LoopInvariant;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+    // If L is the addrec's loop, it's computable.
+    if (AR->getLoop() == L)
+      return LoopComputable;
+
+    // Add recurrences are never invariant in the function-body (null loop).
+    if (!L)
+      return LoopVariant;
+
+    // This recurrence is variant w.r.t. L if L contains AR's loop.
+    if (L->contains(AR->getLoop()))
+      return LoopVariant;
+
+    // This recurrence is invariant w.r.t. L if AR's loop contains L.
+    if (AR->getLoop()->contains(L))
+      return LoopInvariant;
+
+    // This recurrence is variant w.r.t. L if any of its operands
+    // are variant.
+    for (auto *Op : AR->operands())
+      if (!isLoopInvariant(Op, L))
+        return LoopVariant;
+
+    // Otherwise it's loop-invariant.
+    return LoopInvariant;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    bool HasVarying = false;
+    for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+      LoopDisposition D = getLoopDisposition(Op, L);
+      if (D == LoopVariant)
+        return LoopVariant;
+      if (D == LoopComputable)
+        HasVarying = true;
+    }
+    return HasVarying ? LoopComputable : LoopInvariant;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+    if (LD == LoopVariant)
+      return LoopVariant;
+    LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+    if (RD == LoopVariant)
+      return LoopVariant;
+    return (LD == LoopInvariant && RD == LoopInvariant) ?
+           LoopInvariant : LoopComputable;
+  }
+  case scUnknown:
+    // All non-instruction values are loop invariant.  All instructions are loop
+    // invariant if they are not contained in the specified loop.
+    // Instructions are never considered invariant in the function body
+    // (null loop) because they are defined within the "loop".
+    if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+      return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+    return LoopInvariant;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  auto &Values = BlockDispositions[S];
+  for (auto &V : Values) {
+    if (V.getPointer() == BB)
+      return V.getInt();
+  }
+  Values.emplace_back(BB, DoesNotDominateBlock);
+  BlockDisposition D = computeBlockDisposition(S, BB);
+  auto &Values2 = BlockDispositions[S];
+  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+    if (V.getPointer() == BB) {
+      V.setInt(D);
+      break;
+    }
+  }
+  return D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+  case scConstant:
+    return ProperlyDominatesBlock;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+  case scAddRecExpr: {
+    // This uses a "dominates" query instead of "properly dominates" query
+    // to test for proper dominance too, because the instruction which
+    // produces the addrec's value is a PHI, and a PHI effectively properly
+    // dominates its entire containing block.
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+    if (!DT.dominates(AR->getLoop()->getHeader(), BB))
+      return DoesNotDominateBlock;
+  }
+  // FALL THROUGH into SCEVNAryExpr handling.
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool Proper = true;
+    for (const SCEV *NAryOp : NAry->operands()) {
+      BlockDisposition D = getBlockDisposition(NAryOp, BB);
+      if (D == DoesNotDominateBlock)
+        return DoesNotDominateBlock;
+      if (D == DominatesBlock)
+        Proper = false;
+    }
+    return Proper ? ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    BlockDisposition LD = getBlockDisposition(LHS, BB);
+    if (LD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    BlockDisposition RD = getBlockDisposition(RHS, BB);
+    if (RD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+      ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUnknown:
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+      if (I->getParent() == BB)
+        return DominatesBlock;
+      if (DT.properlyDominates(I->getParent(), BB))
+        return ProperlyDominatesBlock;
+      return DoesNotDominateBlock;
+    }
+    return ProperlyDominatesBlock;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  // Search for a SCEV expression node within an expression tree.
+  // Implements SCEVTraversal::Visitor.
+  struct SCEVSearch {
+    const SCEV *Node;
+    bool IsFound;
+
+    SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+
+    bool follow(const SCEV *S) {
+      IsFound |= (S == Node);
+      return !IsFound;
+    }
+    bool isDone() const { return IsFound; }
+  };
+
+  SCEVSearch Search(Op);
+  visitAll(S, Search);
+  return Search.IsFound;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+  ValuesAtScopes.erase(S);
+  LoopDispositions.erase(S);
+  BlockDispositions.erase(S);
+  UnsignedRanges.erase(S);
+  SignedRanges.erase(S);
+
+  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+    BackedgeTakenInfo &BEInfo = I->second;
+    if (BEInfo.hasOperand(S, this)) {
+      BEInfo.clear();
+      BackedgeTakenCounts.erase(I++);
+    }
+    else
+      ++I;
+  }
+}
+
+typedef DenseMap<const Loop *, std::string> VerifyMap;
+
+/// replaceSubString - Replaces all occurrences of From in Str with To.
+static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
+  size_t Pos = 0;
+  while ((Pos = Str.find(From, Pos)) != std::string::npos) {
+    Str.replace(Pos, From.size(), To.data(), To.size());
+    Pos += To.size();
+  }
+}
+
+/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
+static void
+getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
+  std::string &S = Map[L];
+  if (S.empty()) {
+    raw_string_ostream OS(S);
+    SE.getBackedgeTakenCount(L)->print(OS);
+
+    // false and 0 are semantically equivalent. This can happen in dead loops.
+    replaceSubString(OS.str(), "false", "0");
+    // Remove wrap flags, their use in SCEV is highly fragile.
+    // FIXME: Remove this when SCEV gets smarter about them.
+    replaceSubString(OS.str(), "<nw>", "");
+    replaceSubString(OS.str(), "<nsw>", "");
+    replaceSubString(OS.str(), "<nuw>", "");
+  }
+
+  for (auto *R : reverse(*L))
+    getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
+}
+
+void ScalarEvolution::verify() const {
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  // Gather stringified backedge taken counts for all loops using SCEV's caches.
+  // FIXME: It would be much better to store actual values instead of strings,
+  //        but SCEV pointers will change if we drop the caches.
+  VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+
+  // Gather stringified backedge taken counts for all loops using a fresh
+  // ScalarEvolution object.
+  ScalarEvolution SE2(F, TLI, AC, DT, LI);
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
+
+  // Now compare whether they're the same with and without caches. This allows
+  // verifying that no pass changed the cache.
+  assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
+         "New loops suddenly appeared!");
+
+  for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
+                           OldE = BackedgeDumpsOld.end(),
+                           NewI = BackedgeDumpsNew.begin();
+       OldI != OldE; ++OldI, ++NewI) {
+    assert(OldI->first == NewI->first && "Loop order changed!");
+
+    // Compare the stringified SCEVs. We don't care if undef backedgetaken count
+    // changes.
+    // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
+    // means that a pass is buggy or SCEV has to learn a new pattern but is
+    // usually not harmful.
+    if (OldI->second != NewI->second &&
+        OldI->second.find("undef") == std::string::npos &&
+        NewI->second.find("undef") == std::string::npos &&
+        OldI->second != "***COULDNOTCOMPUTE***" &&
+        NewI->second != "***COULDNOTCOMPUTE***") {
+      dbgs() << "SCEVValidator: SCEV for loop '"
+             << OldI->first->getHeader()->getName()
+             << "' changed from '" << OldI->second
+             << "' to '" << NewI->second << "'!\n";
+      std::abort();
+    }
+  }
+
+  // TODO: Verify more things.
+}
+
+char ScalarEvolutionAnalysis::PassID;
+
+ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
+                                             AnalysisManager<Function> *AM) {
+  return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
+                         AM->getResult<AssumptionAnalysis>(F),
+                         AM->getResult<DominatorTreeAnalysis>(F),
+                         AM->getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
+  AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
+                      "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
+                    "Scalar Evolution Analysis", false, true)
+char ScalarEvolutionWrapperPass::ID = 0;
+
+ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
+  initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
+  SE.reset(new ScalarEvolution(
+      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+      getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+      getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
+  return false;
+}
+
+void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
+
+void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
+  SE->print(OS);
+}
+
+void ScalarEvolutionWrapperPass::verifyAnalysis() const {
+  if (!VerifySCEV)
+    return;
+
+  SE->verify();
+}
+
+void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AssumptionCacheTracker>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}
+
+const SCEVPredicate *
+ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
+                                   const SCEVConstant *RHS) {
+  FoldingSetNodeID ID;
+  // Unique this node based on the arguments
+  ID.AddInteger(SCEVPredicate::P_Equal);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = nullptr;
+  if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
+    return S;
+  SCEVEqualPredicate *Eq = new (SCEVAllocator)
+      SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+  UniquePreds.InsertNode(Eq, IP);
+  return Eq;
+}
+
+namespace {
+class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+                             SCEVUnionPredicate &A) {
+    SCEVPredicateRewriter Rewriter(SE, A);
+    return Rewriter.visit(Scev);
+  }
+
+  SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P)
+      : SCEVRewriteVisitor(SE), P(P) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    auto ExprPreds = P.getPredicatesForExpr(Expr);
+    for (auto *Pred : ExprPreds)
+      if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred))
+        if (IPred->getLHS() == Expr)
+          return IPred->getRHS();
+
+    return Expr;
+  }
+
+private:
+  SCEVUnionPredicate &P;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev,
+                                                   SCEVUnionPredicate &Preds) {
+  return SCEVPredicateRewriter::rewrite(Scev, *this, Preds);
+}
+
+/// SCEV predicates
+SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
+                             SCEVPredicateKind Kind)
+    : FastID(ID), Kind(Kind) {}
+
+SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
+                                       const SCEVUnknown *LHS,
+                                       const SCEVConstant *RHS)
+    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+
+bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
+  const auto *Op = dyn_cast<const SCEVEqualPredicate>(N);
+
+  if (!Op)
+    return false;
+
+  return Op->LHS == LHS && Op->RHS == RHS;
+}
+
+bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+
+const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+
+void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
+  OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+}
+
+/// Union predicates don't get cached so create a dummy set ID for it.
+SCEVUnionPredicate::SCEVUnionPredicate()
+    : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+
+bool SCEVUnionPredicate::isAlwaysTrue() const {
+  return all_of(Preds,
+                [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
+}
+
+ArrayRef<const SCEVPredicate *>
+SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
+  auto I = SCEVToPreds.find(Expr);
+  if (I == SCEVToPreds.end())
+    return ArrayRef<const SCEVPredicate *>();
+  return I->second;
+}
+
+bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
+  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N))
+    return all_of(Set->Preds,
+                  [this](const SCEVPredicate *I) { return this->implies(I); });
+
+  auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
+  if (ScevPredsIt == SCEVToPreds.end())
+    return false;
+  auto &SCEVPreds = ScevPredsIt->second;
+
+  return any_of(SCEVPreds,
+                [N](const SCEVPredicate *I) { return I->implies(N); });
+}
+
+const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
+
+void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
+  for (auto Pred : Preds)
+    Pred->print(OS, Depth);
+}
+
+void SCEVUnionPredicate::add(const SCEVPredicate *N) {
+  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) {
+    for (auto Pred : Set->Preds)
+      add(Pred);
+    return;
+  }
+
+  if (implies(N))
+    return;
+
+  const SCEV *Key = N->getExpr();
+  assert(Key && "Only SCEVUnionPredicate doesn't have an "
+                " associated expression!");
+
+  SCEVToPreds[Key].push_back(N);
+  Preds.push_back(N);
+}
+
+PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE)
+    : SE(SE), Generation(0) {}
+
+const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
+  const SCEV *Expr = SE.getSCEV(V);
+  RewriteEntry &Entry = RewriteMap[Expr];
+
+  // If we already have an entry and the version matches, return it.
+  if (Entry.second && Generation == Entry.first)
+    return Entry.second;
+
+  // We found an entry but it's stale. Rewrite the stale entry
+  // acording to the current predicate.
+  if (Entry.second)
+    Expr = Entry.second;
+
+  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds);
+  Entry = {Generation, NewSCEV};
+
+  return NewSCEV;
+}
+
+void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
+  if (Preds.implies(&Pred))
+    return;
+  Preds.add(&Pred);
+  updateGeneration();
+}
+
+const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
+  return Preds;
+}
+
+void PredicatedScalarEvolution::updateGeneration() {
+  // If the generation number wrapped recompute everything.
+  if (++Generation == 0) {
+    for (auto &II : RewriteMap) {
+      const SCEV *Rewritten = II.second.second;
+      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)};
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 0000000..2e50c80
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,148 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+using namespace llvm;
+
+AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
+                                const MemoryLocation &LocB) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are. This allows the code below to ignore this special
+  // case.
+  if (LocA.Size == 0 || LocB.Size == 0)
+    return NoAlias;
+
+  // This is SCEVAAResult. Get the SCEVs!
+  const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr));
+  const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr));
+
+  // If they evaluate to the same expression, it's a MustAlias.
+  if (AS == BS)
+    return MustAlias;
+
+  // If something is known about the difference between the two addresses,
+  // see if it's enough to prove a NoAlias.
+  if (SE.getEffectiveSCEVType(AS->getType()) ==
+      SE.getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
+    APInt ASizeInt(BitWidth, LocA.Size);
+    APInt BSizeInt(BitWidth, LocB.Size);
+
+    // Compute the difference between the two pointers.
+    const SCEV *BA = SE.getMinusSCEV(BS, AS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) &&
+        (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax()))
+      return NoAlias;
+
+    // Folding the subtraction while preserving range information can be tricky
+    // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+    // and try again to see if things fold better that way.
+
+    // Compute the difference between the two pointers.
+    const SCEV *AB = SE.getMinusSCEV(AS, BS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) &&
+        (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax()))
+      return NoAlias;
+  }
+
+  // If ScalarEvolution can find an underlying object, form a new query.
+  // The correctness of this depends on ScalarEvolution not recognizing
+  // inttoptr and ptrtoint operators.
+  Value *AO = GetBaseValue(AS);
+  Value *BO = GetBaseValue(BS);
+  if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+    if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
+                             AO ? +MemoryLocation::UnknownSize : LocA.Size,
+                             AO ? AAMDNodes() : LocA.AATags),
+              MemoryLocation(BO ? BO : LocB.Ptr,
+                             BO ? +MemoryLocation::UnknownSize : LocB.Size,
+                             BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
+      return NoAlias;
+
+  // Forward the query to the next analysis.
+  return AAResultBase::alias(LocA, LocB);
+}
+
+/// Given an expression, try to find a base value.
+///
+/// Returns null if none was found.
+Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetBaseValue(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands() - 1);
+    if (Last->getType()->isPointerTy())
+      return GetBaseValue(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // This is a leaf node.
+    return U->getValue();
+  }
+  // No Identified object found.
+  return nullptr;
+}
+
+SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F),
+                      AM->getResult<ScalarEvolutionAnalysis>(F));
+}
+
+char SCEVAA::PassID;
+
+char SCEVAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa",
+                      "ScalarEvolution-based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa",
+                    "ScalarEvolution-based Alias Analysis", false, true)
+
+FunctionPass *llvm::createSCEVAAWrapperPass() {
+  return new SCEVAAWrapperPass();
+}
+
+SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) {
+  initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool SCEVAAWrapperPass::runOnFunction(Function &F) {
+  Result.reset(
+      new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+                       getAnalysis<ScalarEvolutionWrapperPass>().getSE()));
+  return false;
+}
+
+void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 0000000..921403d
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,2026 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
+                                       Instruction::CastOps Op,
+                                       BasicBlock::iterator IP) {
+  // This function must be called with the builder having a valid insertion
+  // point. It doesn't need to be the actual IP where the uses of the returned
+  // cast will be added, but it must dominate such IP.
+  // We use this precondition to produce a cast that will dominate all its
+  // uses. In particular, this is crucial for the case where the builder's
+  // insertion point *is* the point where we were asked to put the cast.
+  // Since we don't know the builder's insertion point is actually
+  // where the uses will be added (only that it dominates it), we are
+  // not allowed to move it.
+  BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+  Instruction *Ret = nullptr;
+
+  // Check to see if there is already a cast!
+  for (User *U : V->users())
+    if (U->getType() == Ty)
+      if (CastInst *CI = dyn_cast<CastInst>(U))
+        if (CI->getOpcode() == Op) {
+          // If the cast isn't where we want it, create a new cast at IP.
+          // Likewise, do not reuse a cast at BIP because it must dominate
+          // instructions that might be inserted before BIP.
+          if (BasicBlock::iterator(CI) != IP || BIP == IP) {
+            // Create a new cast, and leave the old cast in place in case
+            // it is being used as an insert point. Clear its operand
+            // so that it doesn't hold anything live.
+            Ret = CastInst::Create(Op, V, Ty, "", &*IP);
+            Ret->takeName(CI);
+            CI->replaceAllUsesWith(Ret);
+            CI->setOperand(0, UndefValue::get(V->getType()));
+            break;
+          }
+          Ret = CI;
+          break;
+        }
+
+  // Create a new cast.
+  if (!Ret)
+    Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
+
+  // We assert at the end of the function since IP might point to an
+  // instruction with different dominance properties than a cast
+  // (an invoke for example) and not dominate BIP (but the cast does).
+  assert(SE.DT.dominates(Ret, &*BIP));
+
+  rememberInstruction(Ret);
+  return Ret;
+}
+
+static BasicBlock::iterator findInsertPointAfter(Instruction *I,
+                                                 BasicBlock *MustDominate) {
+  BasicBlock::iterator IP = ++I->getIterator();
+  if (auto *II = dyn_cast<InvokeInst>(I))
+    IP = II->getNormalDest()->begin();
+
+  while (isa<PHINode>(IP))
+    ++IP;
+
+  while (IP->isEHPad()) {
+    if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+      ++IP;
+    } else if (isa<CatchSwitchInst>(IP)) {
+      IP = MustDominate->getFirstInsertionPt();
+    } else {
+      llvm_unreachable("unexpected eh pad!");
+    }
+  }
+
+  return IP;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
+  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+  assert((Op == Instruction::BitCast ||
+          Op == Instruction::PtrToInt ||
+          Op == Instruction::IntToPtr) &&
+         "InsertNoopCastOfTo cannot perform non-noop casts!");
+  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+         "InsertNoopCastOfTo cannot change sizes!");
+
+  // Short-circuit unnecessary bitcasts.
+  if (Op == Instruction::BitCast) {
+    if (V->getType() == Ty)
+      return V;
+    if (CastInst *CI = dyn_cast<CastInst>(V)) {
+      if (CI->getOperand(0)->getType() == Ty)
+        return CI->getOperand(0);
+    }
+  }
+  // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+  if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+      SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+    if (CastInst *CI = dyn_cast<CastInst>(V))
+      if ((CI->getOpcode() == Instruction::PtrToInt ||
+           CI->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CI->getType()) ==
+          SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+        return CI->getOperand(0);
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if ((CE->getOpcode() == Instruction::PtrToInt ||
+           CE->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CE->getType()) ==
+          SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+        return CE->getOperand(0);
+  }
+
+  // Fold a cast of a constant.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getCast(Op, C, Ty);
+
+  // Cast the argument at the beginning of the entry block, after
+  // any bitcasts of other arguments.
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+    while ((isa<BitCastInst>(IP) &&
+            isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+            cast<BitCastInst>(IP)->getOperand(0) != A) ||
+           isa<DbgInfoIntrinsic>(IP))
+      ++IP;
+    return ReuseOrCreateCast(A, Ty, Op, IP);
+  }
+
+  // Cast the instruction immediately after the instruction.
+  Instruction *I = cast<Instruction>(V);
+  BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
+  return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+                                 Value *LHS, Value *RHS) {
+  // Fold a binop with constant operands.
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+  // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
+  unsigned ScanLimit = 6;
+  BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+  // Scanning starts from the last instruction before the insertion point.
+  BasicBlock::iterator IP = Builder.GetInsertPoint();
+  if (IP != BlockBegin) {
+    --IP;
+    for (; ScanLimit; --IP, --ScanLimit) {
+      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+      // generated code.
+      if (isa<DbgInfoIntrinsic>(IP))
+        ScanLimit++;
+      if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+          IP->getOperand(1) == RHS)
+        return &*IP;
+      if (IP == BlockBegin) break;
+    }
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+  BuilderType::InsertPointGuard Guard(Builder);
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader->getTerminator());
+  }
+
+  // If we haven't found this binop, insert it.
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
+  BO->setDebugLoc(Loc);
+  rememberInstruction(BO);
+
+  return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+                              const SCEV *Factor, ScalarEvolution &SE,
+                              const DataLayout &DL) {
+  // Everything is divisible by one.
+  if (Factor->isOne())
+    return true;
+
+  // x/x == 1.
+  if (S == Factor) {
+    S = SE.getConstant(S->getType(), 1);
+    return true;
+  }
+
+  // For a Constant, check for a multiple of the given factor.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    // 0/x == 0.
+    if (C->isZero())
+      return true;
+    // Check for divisibility.
+    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+      ConstantInt *CI =
+          ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
+      // If the quotient is zero and the remainder is non-zero, reject
+      // the value at this scale. It will be considered for subsequent
+      // smaller scales.
+      if (!CI->isZero()) {
+        const SCEV *Div = SE.getConstant(CI);
+        S = Div;
+        Remainder = SE.getAddExpr(
+            Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
+        return true;
+      }
+    }
+  }
+
+  // In a Mul, check if there is a constant operand which is a multiple
+  // of the given factor.
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    // Size is known, check if there is a constant operand which is a multiple
+    // of the given factor. If so, we can factor it.
+    const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+      if (!C->getAPInt().srem(FC->getAPInt())) {
+        SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+        NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
+        S = SE.getMulExpr(NewMulOps);
+        return true;
+      }
+  }
+
+  // In an AddRec, check if both start and step are divisible.
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *Step = A->getStepRecurrence(SE);
+    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
+      return false;
+    if (!StepRem->isZero())
+      return false;
+    const SCEV *Start = A->getStart();
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
+      return false;
+    S = SE.getAddRecExpr(Start, Step, A->getLoop(),
+                         A->getNoWrapFlags(SCEV::FlagNW));
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+                                Type *Ty,
+                                ScalarEvolution &SE) {
+  unsigned NumAddRecs = 0;
+  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+    ++NumAddRecs;
+  // Group Ops into non-addrecs and addrecs.
+  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+  // Let ScalarEvolution sort and simplify the non-addrecs list.
+  const SCEV *Sum = NoAddRecs.empty() ?
+                    SE.getConstant(Ty, 0) :
+                    SE.getAddExpr(NoAddRecs);
+  // If it returned an add, use the operands. Otherwise it simplified
+  // the sum into a single value, so just use that.
+  Ops.clear();
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+    Ops.append(Add->op_begin(), Add->op_end());
+  else if (!Sum->isZero())
+    Ops.push_back(Sum);
+  // Then append the addrecs.
+  Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+                         Type *Ty,
+                         ScalarEvolution &SE) {
+  // Find the addrecs.
+  SmallVector<const SCEV *, 8> AddRecs;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+      const SCEV *Start = A->getStart();
+      if (Start->isZero()) break;
+      const SCEV *Zero = SE.getConstant(Ty, 0);
+      AddRecs.push_back(SE.getAddRecExpr(Zero,
+                                         A->getStepRecurrence(SE),
+                                         A->getLoop(),
+                                         A->getNoWrapFlags(SCEV::FlagNW)));
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+        Ops[i] = Zero;
+        Ops.append(Add->op_begin(), Add->op_end());
+        e += Add->getNumOperands();
+      } else {
+        Ops[i] = Start;
+      }
+    }
+  if (!AddRecs.empty()) {
+    // Add the addrecs onto the end of the list.
+    Ops.append(AddRecs.begin(), AddRecs.end());
+    // Resort the operand list, moving any constants to the front.
+    SimplifyAddOperands(Ops, Ty, SE);
+  }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+                                    const SCEV *const *op_end,
+                                    PointerType *PTy,
+                                    Type *Ty,
+                                    Value *V) {
+  Type *OriginalElTy = PTy->getElementType();
+  Type *ElTy = OriginalElTy;
+  SmallVector<Value *, 4> GepIndices;
+  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+  bool AnyNonZeroIndices = false;
+
+  // Split AddRecs up into parts as either of the parts may be usable
+  // without the other.
+  SplitAddRecs(Ops, Ty, SE);
+
+  Type *IntPtrTy = DL.getIntPtrType(PTy);
+
+  // Descend down the pointer's type and attempt to convert the other
+  // operands into GEP indices, at each level. The first index in a GEP
+  // indexes into the array implied by the pointer operand; the rest of
+  // the indices index into the element or field type selected by the
+  // preceding index.
+  for (;;) {
+    // If the scale size is not 0, attempt to factor out a scale for
+    // array indexing.
+    SmallVector<const SCEV *, 8> ScaledOps;
+    if (ElTy->isSized()) {
+      const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
+      if (!ElSize->isZero()) {
+        SmallVector<const SCEV *, 8> NewOps;
+        for (const SCEV *Op : Ops) {
+          const SCEV *Remainder = SE.getConstant(Ty, 0);
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
+            // Op now has ElSize factored out.
+            ScaledOps.push_back(Op);
+            if (!Remainder->isZero())
+              NewOps.push_back(Remainder);
+            AnyNonZeroIndices = true;
+          } else {
+            // The operand was not divisible, so add it to the list of operands
+            // we'll scan next iteration.
+            NewOps.push_back(Op);
+          }
+        }
+        // If we made any changes, update Ops.
+        if (!ScaledOps.empty()) {
+          Ops = NewOps;
+          SimplifyAddOperands(Ops, Ty, SE);
+        }
+      }
+    }
+
+    // Record the scaled array index for this level of the type. If
+    // we didn't find any operands that could be factored, tentatively
+    // assume that element zero was selected (since the zero offset
+    // would obviously be folded away).
+    Value *Scaled = ScaledOps.empty() ?
+                    Constant::getNullValue(Ty) :
+                    expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+    GepIndices.push_back(Scaled);
+
+    // Collect struct field index operands.
+    while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+      bool FoundFieldNo = false;
+      // An empty struct has no fields.
+      if (STy->getNumElements() == 0) break;
+      // Field offsets are known. See if a constant offset falls within any of
+      // the struct fields.
+      if (Ops.empty())
+        break;
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+        if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+          const StructLayout &SL = *DL.getStructLayout(STy);
+          uint64_t FullOffset = C->getValue()->getZExtValue();
+          if (FullOffset < SL.getSizeInBytes()) {
+            unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+            GepIndices.push_back(
+                ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+            ElTy = STy->getTypeAtIndex(ElIdx);
+            Ops[0] =
+                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+            AnyNonZeroIndices = true;
+            FoundFieldNo = true;
+          }
+        }
+      // If no struct field offsets were found, tentatively assume that
+      // field zero was selected (since the zero offset would obviously
+      // be folded away).
+      if (!FoundFieldNo) {
+        ElTy = STy->getTypeAtIndex(0u);
+        GepIndices.push_back(
+          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+      }
+    }
+
+    if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+      ElTy = ATy->getElementType();
+    else
+      break;
+  }
+
+  // If none of the operands were convertible to proper GEP indices, cast
+  // the base to i8* and do an ugly getelementptr with that. It's still
+  // better than ptrtoint+arithmetic+inttoptr at least.
+  if (!AnyNonZeroIndices) {
+    // Cast the base to i8*.
+    V = InsertNoopCastOfTo(V,
+       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+    assert(!isa<Instruction>(V) ||
+           SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
+
+    // Expand the operands for a plain byte offset.
+    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+    // Fold a GEP with constant operands.
+    if (Constant *CLHS = dyn_cast<Constant>(V))
+      if (Constant *CRHS = dyn_cast<Constant>(Idx))
+        return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+                                              CLHS, CRHS);
+
+    // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
+    unsigned ScanLimit = 6;
+    BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+    // Scanning starts from the last instruction before the insertion point.
+    BasicBlock::iterator IP = Builder.GetInsertPoint();
+    if (IP != BlockBegin) {
+      --IP;
+      for (; ScanLimit; --IP, --ScanLimit) {
+        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+        // generated code.
+        if (isa<DbgInfoIntrinsic>(IP))
+          ScanLimit++;
+        if (IP->getOpcode() == Instruction::GetElementPtr &&
+            IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+          return &*IP;
+        if (IP == BlockBegin) break;
+      }
+    }
+
+    // Save the original insertion point so we can restore it when we're done.
+    BuilderType::InsertPointGuard Guard(Builder);
+
+    // Move the insertion point out of as many loops as we can.
+    while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+      if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+      BasicBlock *Preheader = L->getLoopPreheader();
+      if (!Preheader) break;
+
+      // Ok, move up a level.
+      Builder.SetInsertPoint(Preheader->getTerminator());
+    }
+
+    // Emit a GEP.
+    Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
+    rememberInstruction(GEP);
+
+    return GEP;
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(V)) break;
+
+    bool AnyIndexNotLoopInvariant =
+        std::any_of(GepIndices.begin(), GepIndices.end(),
+                    [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
+    if (AnyIndexNotLoopInvariant)
+      break;
+
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader->getTerminator());
+  }
+
+  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+  // because ScalarEvolution may have changed the address arithmetic to
+  // compute a value which is beyond the end of the allocated object.
+  Value *Casted = V;
+  if (V->getType() != PTy)
+    Casted = InsertNoopCastOfTo(Casted, PTy);
+  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
+  Ops.push_back(SE.getUnknown(GEP));
+  rememberInstruction(GEP);
+
+  // Restore the original insert point.
+  Builder.restoreIP(SaveInsertPt);
+
+  return expand(SE.getAddExpr(Ops));
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+                                        DominatorTree &DT) {
+  if (!A) return B;
+  if (!B) return A;
+  if (A->contains(B)) return B;
+  if (B->contains(A)) return A;
+  if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+  if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+  return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+  // Test whether we've already computed the most relevant loop for this SCEV.
+  auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  if (isa<SCEVConstant>(S))
+    // A constant has no relevant loops.
+    return nullptr;
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+      return Pair.first->second = SE.LI.getLoopFor(I->getParent());
+    // A non-instruction has no relevant loops.
+    return nullptr;
+  }
+  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+    const Loop *L = nullptr;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      L = AR->getLoop();
+    for (const SCEV *Op : N->operands())
+      L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
+    return RelevantLoops[N] = L;
+  }
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+    const Loop *Result = getRelevantLoop(C->getOperand());
+    return RelevantLoops[C] = Result;
+  }
+  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+    const Loop *Result = PickMostRelevantLoop(
+        getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
+    return RelevantLoops[D] = Result;
+  }
+  llvm_unreachable("Unexpected SCEV type!");
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+  DominatorTree &DT;
+public:
+  explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+  bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+                  std::pair<const Loop *, const SCEV *> RHS) const {
+    // Keep pointer operands sorted at the end.
+    if (LHS.second->getType()->isPointerTy() !=
+        RHS.second->getType()->isPointerTy())
+      return LHS.second->getType()->isPointerTy();
+
+    // Compare loops with PickMostRelevantLoop.
+    if (LHS.first != RHS.first)
+      return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+    // If one operand is a non-constant negative and the other is not,
+    // put the non-constant negative on the right so that a sub can
+    // be used instead of a negate and add.
+    if (LHS.second->isNonConstantNegative()) {
+      if (!RHS.second->isNonConstantNegative())
+        return false;
+    } else if (RHS.second->isNonConstantNegative())
+      return true;
+
+    // Otherwise they are equivalent according to this comparison.
+    return false;
+  }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the add operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal, and
+  // so that pointer operands are inserted first, which the code below relies on
+  // to form more involved GEPs.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants and
+  // pointer operands precede non-pointer operands.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+
+  // Emit instructions to add all the operands. Hoist as much as possible
+  // out of loops, and form meaningful getelementptrs where possible.
+  Value *Sum = nullptr;
+  for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
+    const Loop *CurLoop = I->first;
+    const SCEV *Op = I->second;
+    if (!Sum) {
+      // This is the first operand. Just expand it.
+      Sum = expand(Op);
+      ++I;
+    } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+      // The running sum expression is a pointer. Try to form a getelementptr
+      // at this level with that as the base.
+      SmallVector<const SCEV *, 4> NewOps;
+      for (; I != E && I->first == CurLoop; ++I) {
+        // If the operand is SCEVUnknown and not instructions, peek through
+        // it, to enable more of it to be folded into the GEP.
+        const SCEV *X = I->second;
+        if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+          if (!isa<Instruction>(U->getValue()))
+            X = SE.getSCEV(U->getValue());
+        NewOps.push_back(X);
+      }
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+    } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+      // The running sum is an integer, and there's a pointer at this level.
+      // Try to form a getelementptr. If the running sum is instructions,
+      // use a SCEVUnknown to avoid re-analyzing them.
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+                                               SE.getSCEV(Sum));
+      for (++I; I != E && I->first == CurLoop; ++I)
+        NewOps.push_back(I->second);
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+    } else if (Op->isNonConstantNegative()) {
+      // Instead of doing a negate and add, just do a subtract.
+      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      Sum = InsertBinop(Instruction::Sub, Sum, W);
+      ++I;
+    } else {
+      // A simple add.
+      Value *W = expandCodeFor(Op, Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Sum)) std::swap(Sum, W);
+      Sum = InsertBinop(Instruction::Add, Sum, W);
+      ++I;
+    }
+  }
+
+  return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the mul operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+
+  // Emit instructions to mul all the operands. Hoist as much as possible
+  // out of loops.
+  Value *Prod = nullptr;
+  for (const auto &I : OpsAndLoops) {
+    const SCEV *Op = I.second;
+    if (!Prod) {
+      // This is the first operand. Just expand it.
+      Prod = expand(Op);
+    } else if (Op->isAllOnesValue()) {
+      // Instead of doing a multiply by negative one, just do a negate.
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+    } else {
+      // A simple mul.
+      Value *W = expandCodeFor(Op, Ty);
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Prod)) std::swap(Prod, W);
+      const APInt *RHS;
+      if (match(W, m_Power2(RHS))) {
+        // Canonicalize Prod*(1<<C) to Prod<<C.
+        assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+        Prod = InsertBinop(Instruction::Shl, Prod,
+                           ConstantInt::get(Ty, RHS->logBase2()));
+      } else {
+        Prod = InsertBinop(Instruction::Mul, Prod, W);
+      }
+    }
+  }
+
+  return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  Value *LHS = expandCodeFor(S->getLHS(), Ty);
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+    const APInt &RHS = SC->getAPInt();
+    if (RHS.isPowerOf2())
+      return InsertBinop(Instruction::LShr, LHS,
+                         ConstantInt::get(Ty, RHS.logBase2()));
+  }
+
+  Value *RHS = expandCodeFor(S->getRHS(), Ty);
+  return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+                              ScalarEvolution &SE) {
+  while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+    Base = A->getStart();
+    Rest = SE.getAddExpr(Rest,
+                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+                                          A->getStepRecurrence(SE),
+                                          A->getLoop(),
+                                          A->getNoWrapFlags(SCEV::FlagNW)));
+  }
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+    Base = A->getOperand(A->getNumOperands()-1);
+    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+    NewAddOps.back() = Rest;
+    Rest = SE.getAddExpr(NewAddOps);
+    ExposePointerBase(Base, Rest, SE);
+  }
+}
+
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+                                         const Loop *L) {
+  if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+      (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+    return false;
+  // If any of the operands don't dominate the insert position, bail.
+  // Addrec operands are always loop-invariant, so this can only happen
+  // if there are instructions which haven't been hoisted.
+  if (L == IVIncInsertLoop) {
+    for (User::op_iterator OI = IncV->op_begin()+1,
+           OE = IncV->op_end(); OI != OE; ++OI)
+      if (Instruction *OInst = dyn_cast<Instruction>(OI))
+        if (!SE.DT.dominates(OInst, IVIncInsertPos))
+          return false;
+  }
+  // Advance to the next instruction.
+  IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+  if (!IncV)
+    return false;
+
+  if (IncV->mayHaveSideEffects())
+    return false;
+
+  if (IncV != PN)
+    return true;
+
+  return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+                                           Instruction *InsertPos,
+                                           bool allowScale) {
+  if (IncV == InsertPos)
+    return nullptr;
+
+  switch (IncV->getOpcode()) {
+  default:
+    return nullptr;
+  // Check for a simple Add/Sub or GEP of a loop invariant step.
+  case Instruction::Add:
+  case Instruction::Sub: {
+    Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+    if (!OInst || SE.DT.dominates(OInst, InsertPos))
+      return dyn_cast<Instruction>(IncV->getOperand(0));
+    return nullptr;
+  }
+  case Instruction::BitCast:
+    return dyn_cast<Instruction>(IncV->getOperand(0));
+  case Instruction::GetElementPtr:
+    for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
+      if (isa<Constant>(*I))
+        continue;
+      if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+        if (!SE.DT.dominates(OInst, InsertPos))
+          return nullptr;
+      }
+      if (allowScale) {
+        // allow any kind of GEP as long as it can be hoisted.
+        continue;
+      }
+      // This must be a pointer addition of constants (pretty), which is already
+      // handled, or some number of address-size elements (ugly). Ugly geps
+      // have 2 operands. i1* is used by the expander to represent an
+      // address-size element.
+      if (IncV->getNumOperands() != 2)
+        return nullptr;
+      unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+      if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+          && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+        return nullptr;
+      break;
+    }
+    return dyn_cast<Instruction>(IncV->getOperand(0));
+  }
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
+  if (SE.DT.dominates(IncV, InsertPos))
+      return true;
+
+  // InsertPos must itself dominate IncV so that IncV's new position satisfies
+  // its existing users.
+  if (isa<PHINode>(InsertPos) ||
+      !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+    return false;
+
+  if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
+    return false;
+
+  // Check that the chain of IV operands leading back to Phi can be hoisted.
+  SmallVector<Instruction*, 4> IVIncs;
+  for(;;) {
+    Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+    if (!Oper)
+      return false;
+    // IncV is safe to hoist.
+    IVIncs.push_back(IncV);
+    IncV = Oper;
+    if (SE.DT.dominates(IncV, InsertPos))
+      break;
+  }
+  for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
+    (*I)->moveBefore(InsertPos);
+  }
+  return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+                                           const Loop *L) {
+  for(Instruction *IVOper = IncV;
+      (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+                                /*allowScale=*/false));) {
+    if (IVOper == PN)
+      return true;
+  }
+  return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+                                 Type *ExpandTy, Type *IntTy,
+                                 bool useSubtract) {
+  Value *IncV;
+  // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+  if (ExpandTy->isPointerTy()) {
+    PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+    // If the step isn't constant, don't use an implicitly scaled GEP, because
+    // that would require a multiply inside the loop.
+    if (!isa<ConstantInt>(StepV))
+      GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                  GEPPtrTy->getAddressSpace());
+    const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+    IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+    if (IncV->getType() != PN->getType()) {
+      IncV = Builder.CreateBitCast(IncV, PN->getType());
+      rememberInstruction(IncV);
+    }
+  } else {
+    IncV = useSubtract ?
+      Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+      Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+    rememberInstruction(IncV);
+  }
+  return IncV;
+}
+
+/// \brief Hoist the addrec instruction chain rooted in the loop phi above the
+/// position. This routine assumes that this is possible (has been checked).
+static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+                           Instruction *Pos, PHINode *LoopPhi) {
+  do {
+    if (DT->dominates(InstToHoist, Pos))
+      break;
+    // Make sure the increment is where we want it. But don't move it
+    // down past a potential existing post-inc user.
+    InstToHoist->moveBefore(Pos);
+    Pos = InstToHoist;
+    InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
+  } while (InstToHoist != LoopPhi);
+}
+
+/// \brief Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or inversion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+                                    const SCEVAddRecExpr *Phi,
+                                    const SCEVAddRecExpr *Requested,
+                                    bool &InvertStep) {
+  Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+  Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+  if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+    return false;
+
+  // Try truncate it if necessary.
+  Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+  if (!Phi)
+    return false;
+
+  // Check whether truncation will help.
+  if (Phi == Requested) {
+    InvertStep = false;
+    return true;
+  }
+
+  // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+  if (SE.getAddExpr(Requested->getStart(),
+                    SE.getNegativeSCEV(Requested)) == Phi) {
+    InvertStep = true;
+    return true;
+  }
+
+  return false;
+}
+
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+                                            SE.getSignExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+                                            SE.getZeroExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                        const Loop *L,
+                                        Type *ExpandTy,
+                                        Type *IntTy,
+                                        Type *&TruncTy,
+                                        bool &InvertStep) {
+  assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
+  // Reuse a previously-inserted PHI, if present.
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (LatchBlock) {
+    PHINode *AddRecPhiMatch = nullptr;
+    Instruction *IncV = nullptr;
+    TruncTy = nullptr;
+    InvertStep = false;
+
+    // Only try partially matching scevs that need truncation and/or
+    // step-inversion if we know this loop is outside the current loop.
+    bool TryNonMatchingSCEV =
+        IVIncInsertLoop &&
+        SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+
+    for (auto &I : *L->getHeader()) {
+      auto *PN = dyn_cast<PHINode>(&I);
+      if (!PN || !SE.isSCEVable(PN->getType()))
+        continue;
+
+      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+      if (!PhiSCEV)
+        continue;
+
+      bool IsMatchingSCEV = PhiSCEV == Normalized;
+      // We only handle truncation and inversion of phi recurrences for the
+      // expanded expression if the expanded expression's loop dominates the
+      // loop we insert to. Check now, so we can bail out early.
+      if (!IsMatchingSCEV && !TryNonMatchingSCEV)
+          continue;
+
+      Instruction *TempIncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+      // Check whether we can reuse this PHI node.
+      if (LSRMode) {
+        if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
+          continue;
+        if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
+          continue;
+      } else {
+        if (!isNormalAddRecExprPHI(PN, TempIncV, L))
+          continue;
+      }
+
+      // Stop if we have found an exact match SCEV.
+      if (IsMatchingSCEV) {
+        IncV = TempIncV;
+        TruncTy = nullptr;
+        InvertStep = false;
+        AddRecPhiMatch = PN;
+        break;
+      }
+
+      // Try whether the phi can be translated into the requested form
+      // (truncated and/or offset by a constant).
+      if ((!TruncTy || InvertStep) &&
+          canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+        // Record the phi node. But don't stop we might find an exact match
+        // later.
+        AddRecPhiMatch = PN;
+        IncV = TempIncV;
+        TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+      }
+    }
+
+    if (AddRecPhiMatch) {
+      // Potentially, move the increment. We have made sure in
+      // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
+      if (L == IVIncInsertLoop)
+        hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+
+      // Ok, the add recurrence looks usable.
+      // Remember this PHI, even in post-inc mode.
+      InsertedValues.insert(AddRecPhiMatch);
+      // Remember the increment.
+      rememberInstruction(IncV);
+      return AddRecPhiMatch;
+    }
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BuilderType::InsertPointGuard Guard(Builder);
+
+  // Another AddRec may need to be recursively expanded below. For example, if
+  // this AddRec is quadratic, the StepV may itself be an AddRec in this
+  // loop. Remove this loop from the PostIncLoops set before expanding such
+  // AddRecs. Otherwise, we cannot find a valid position for the step
+  // (i.e. StepV can never dominate its loop header).  Ideally, we could do
+  // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+  // so it's not worth implementing SmallPtrSet::swap.
+  PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+  PostIncLoops.clear();
+
+  // Expand code for the start value.
+  Value *StartV =
+      expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
+
+  // StartV must be hoisted into L's preheader to dominate the new phi.
+  assert(!isa<Instruction>(StartV) ||
+         SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+                                 L->getHeader()));
+
+  // Expand code for the step value. Do this before creating the PHI so that PHI
+  // reuse code doesn't see an incomplete PHI.
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  // If the stride is negative, insert a sub instead of an add for the increment
+  // (unless it's a constant, because subtracts of constants are canonicalized
+  // to adds).
+  bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+  if (useSubtract)
+    Step = SE.getNegativeSCEV(Step);
+  // Expand the step somewhere that dominates the loop header.
+  Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+
+  // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+  // we actually do emit an addition.  It does not apply if we emit a
+  // subtraction.
+  bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+  bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
+  // Create the PHI.
+  BasicBlock *Header = L->getHeader();
+  Builder.SetInsertPoint(Header, Header->begin());
+  pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+                                  Twine(IVName) + ".iv");
+  rememberInstruction(PN);
+
+  // Create the step instructions and populate the PHI.
+  for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+    BasicBlock *Pred = *HPI;
+
+    // Add a start value.
+    if (!L->contains(Pred)) {
+      PN->addIncoming(StartV, Pred);
+      continue;
+    }
+
+    // Create a step value and add it to the PHI.
+    // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+    // instructions at IVIncInsertPos.
+    Instruction *InsertPos = L == IVIncInsertLoop ?
+      IVIncInsertPos : Pred->getTerminator();
+    Builder.SetInsertPoint(InsertPos);
+    Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
+    if (isa<OverflowingBinaryOperator>(IncV)) {
+      if (IncrementIsNUW)
+        cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
+      if (IncrementIsNSW)
+        cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
+    }
+    PN->addIncoming(IncV, Pred);
+  }
+
+  // After expanding subexpressions, restore the PostIncLoops set so the caller
+  // can ensure that IVIncrement dominates the current uses.
+  PostIncLoops = SavedPostIncLoops;
+
+  // Remember this PHI, even in post-inc mode.
+  InsertedValues.insert(PN);
+
+  return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+  Type *STy = S->getType();
+  Type *IntTy = SE.getEffectiveSCEVType(STy);
+  const Loop *L = S->getLoop();
+
+  // Determine a normalized form of this expression, which is the expression
+  // before any post-inc adjustment is made.
+  const SCEVAddRecExpr *Normalized = S;
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
+        Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec start.
+  const SCEV *Start = Normalized->getStart();
+  const SCEV *PostLoopOffset = nullptr;
+  if (!SE.properlyDominates(Start, L->getHeader())) {
+    PostLoopOffset = Start;
+    Start = SE.getConstant(Normalized->getType(), 0);
+    Normalized = cast<SCEVAddRecExpr>(
+      SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+                       Normalized->getLoop(),
+                       Normalized->getNoWrapFlags(SCEV::FlagNW)));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec step.
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  const SCEV *PostLoopScale = nullptr;
+  if (!SE.dominates(Step, L->getHeader())) {
+    PostLoopScale = Step;
+    Step = SE.getConstant(Normalized->getType(), 1);
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(
+                             Start, Step, Normalized->getLoop(),
+                             Normalized->getNoWrapFlags(SCEV::FlagNW)));
+  }
+
+  // Expand the core addrec. If we need post-loop scaling, force it to
+  // expand to an integer type to avoid the need for additional casting.
+  Type *ExpandTy = PostLoopScale ? IntTy : STy;
+  // In some cases, we decide to reuse an existing phi node but need to truncate
+  // it and/or invert the step.
+  Type *TruncTy = nullptr;
+  bool InvertStep = false;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
+                                          TruncTy, InvertStep);
+
+  // Accommodate post-inc mode, if necessary.
+  Value *Result;
+  if (!PostIncLoops.count(L))
+    Result = PN;
+  else {
+    // In PostInc mode, use the post-incremented value.
+    BasicBlock *LatchBlock = L->getLoopLatch();
+    assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+    Result = PN->getIncomingValueForBlock(LatchBlock);
+
+    // For an expansion to use the postinc form, the client must call
+    // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+    // or dominated by IVIncInsertPos.
+    if (isa<Instruction>(Result) &&
+        !SE.DT.dominates(cast<Instruction>(Result),
+                         &*Builder.GetInsertPoint())) {
+      // The induction variable's postinc expansion does not dominate this use.
+      // IVUsers tries to prevent this case, so it is rare. However, it can
+      // happen when an IVUser outside the loop is not dominated by the latch
+      // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+      // all cases. Consider a phi outide whose operand is replaced during
+      // expansion with the value of the postinc user. Without fundamentally
+      // changing the way postinc users are tracked, the only remedy is
+      // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+      // but hopefully expandCodeFor handles that.
+      bool useSubtract =
+        !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+      if (useSubtract)
+        Step = SE.getNegativeSCEV(Step);
+      Value *StepV;
+      {
+        // Expand the step somewhere that dominates the loop header.
+        BuilderType::InsertPointGuard Guard(Builder);
+        StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+      }
+      Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+    }
+  }
+
+  // We have decided to reuse an induction variable of a dominating loop. Apply
+  // truncation and/or invertion of the step.
+  if (TruncTy) {
+    Type *ResTy = Result->getType();
+    // Normalize the result type.
+    if (ResTy != SE.getEffectiveSCEVType(ResTy))
+      Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
+    // Truncate the result.
+    if (TruncTy != Result->getType()) {
+      Result = Builder.CreateTrunc(Result, TruncTy);
+      rememberInstruction(Result);
+    }
+    // Invert the result.
+    if (InvertStep) {
+      Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
+                                 Result);
+      rememberInstruction(Result);
+    }
+  }
+
+  // Re-apply any non-loop-dominating scale.
+  if (PostLoopScale) {
+    assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
+    Result = InsertNoopCastOfTo(Result, IntTy);
+    Result = Builder.CreateMul(Result,
+                               expandCodeFor(PostLoopScale, IntTy));
+    rememberInstruction(Result);
+  }
+
+  // Re-apply any non-loop-dominating offset.
+  if (PostLoopOffset) {
+    if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+      const SCEV *const OffsetArray[1] = { PostLoopOffset };
+      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+    } else {
+      Result = InsertNoopCastOfTo(Result, IntTy);
+      Result = Builder.CreateAdd(Result,
+                                 expandCodeFor(PostLoopOffset, IntTy));
+      rememberInstruction(Result);
+    }
+  }
+
+  return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+  if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  const Loop *L = S->getLoop();
+
+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = nullptr;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+                                       S->getNoWrapFlags(SCEV::FlagNW)));
+    BasicBlock::iterator NewInsertPt =
+        findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
+                      &*NewInsertPt);
+    return V;
+  }
+
+  // {X,+,F} --> X + {0,+,F}
+  if (!S->getStart()->isZero()) {
+    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+    NewOps[0] = SE.getConstant(Ty, 0);
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
+                                        S->getNoWrapFlags(SCEV::FlagNW));
+
+    // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+    // comments on expandAddToGEP for details.
+    const SCEV *Base = S->getStart();
+    const SCEV *RestArray[1] = { Rest };
+    // Dig into the expression to find the pointer base for a GEP.
+    ExposePointerBase(Base, RestArray[0], SE);
+    // If we found a pointer, expand the AddRec with a GEP.
+    if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+      // Make sure the Base isn't something exotic, such as a multiplied
+      // or divided pointer value. In those cases, the result type isn't
+      // actually a pointer type.
+      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+        Value *StartV = expand(Base);
+        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+      }
+    }
+
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
+  }
+
+  // If we don't yet have a canonical IV, create one.
+  if (!CanonicalIV) {
+    // Create and insert the PHI node for the induction variable in the
+    // specified loop.
+    BasicBlock *Header = L->getHeader();
+    pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+    CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+                                  &Header->front());
+    rememberInstruction(CanonicalIV);
+
+    SmallSet<BasicBlock *, 4> PredSeen;
+    Constant *One = ConstantInt::get(Ty, 1);
+    for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
+      BasicBlock *HP = *HPI;
+      if (!PredSeen.insert(HP).second) {
+        // There must be an incoming value for each predecessor, even the
+        // duplicates!
+        CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
+        continue;
+      }
+
+      if (L->contains(HP)) {
+        // Insert a unit add instruction right before the terminator
+        // corresponding to the back-edge.
+        Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+                                                     "indvar.next",
+                                                     HP->getTerminator());
+        Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
+        rememberInstruction(Add);
+        CanonicalIV->addIncoming(Add, HP);
+      } else {
+        CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+      }
+    }
+  }
+
+  // {0,+,1} --> Insert a canonical induction variable into the loop!
+  if (S->isAffine() && S->getOperand(1)->isOne()) {
+    assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+           "IVs with types different from the canonical IV should "
+           "already have been handled!");
+    return CanonicalIV;
+  }
+
+  // {0,+,F} --> {0,+,1} * F
+
+  // If this is a simple linear addrec, emit it now as a special case.
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(CanonicalIV),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            CanonicalIV->getType())),
+        Ty));
+
+  // If this is a chain of recurrences, turn it into a closed form, using the
+  // folders, then expandCodeFor the closed form.  This allows the folders to
+  // simplify the expression without having to build a bunch of special code
+  // into this folder.
+  const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
+
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  const SCEV *NewS = S;
+  const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+  //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
+
+  // Truncate the result down to the original type, if needed.
+  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+  return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateTrunc(V, Ty);
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateZExt(V, Ty);
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+  Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateSExt(V, Ty);
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
+                                   Instruction *IP) {
+  assert(IP);
+  Builder.SetInsertPoint(IP);
+  return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
+  // Expand the code for this SCEV.
+  Value *V = expand(SH);
+  if (Ty) {
+    assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+           "non-trivial casts should be done with the SCEVs directly!");
+    V = InsertNoopCastOfTo(V, Ty);
+  }
+  return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  Instruction *InsertPt = &*Builder.GetInsertPoint();
+  for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+       L = L->getParentLoop())
+    if (SE.isLoopInvariant(S, L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+      else {
+        // LSR sets the insertion point for AddRec start/step values to the
+        // block start to simplify value reuse, even though it's an invalid
+        // position. SCEVExpander must correct for this in all cases.
+        InsertPt = &*L->getHeader()->getFirstInsertionPt();
+      }
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+        InsertPt = &*L->getHeader()->getFirstInsertionPt();
+      while (InsertPt != Builder.GetInsertPoint()
+             && (isInsertedInstruction(InsertPt)
+                 || isa<DbgInfoIntrinsic>(InsertPt))) {
+        InsertPt = &*std::next(InsertPt->getIterator());
+      }
+      break;
+    }
+
+  // Check to see if we already expanded this here.
+  auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+  if (I != InsertedExpressions.end())
+    return I->second;
+
+  BuilderType::InsertPointGuard Guard(Builder);
+  Builder.SetInsertPoint(InsertPt);
+
+  // Expand the expression into instructions.
+  Value *V = visit(S);
+
+  // Remember the expanded value for this SCEV at this location.
+  //
+  // This is independent of PostIncLoops. The mapped value simply materializes
+  // the expression at this insertion point. If the mapped value happened to be
+  // a postinc expansion, it could be reused by a non-postinc user, but only if
+  // its insertion point was already at the head of the loop.
+  InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+  return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+  if (!PostIncLoops.empty())
+    InsertedPostIncValues.insert(I);
+  else
+    InsertedValues.insert(I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none).  A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                    Type *Ty) {
+  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+  // Build a SCEV for {0,+,1}<L>.
+  // Conservatively use FlagAnyWrap for now.
+  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+                                   SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
+
+  // Emit code for it.
+  BuilderType::InsertPointGuard Guard(Builder);
+  PHINode *V =
+      cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
+
+  return V;
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+                                           SmallVectorImpl<WeakVH> &DeadInsts,
+                                           const TargetTransformInfo *TTI) {
+  // Find integer phis in order of increasing width.
+  SmallVector<PHINode*, 8> Phis;
+  for (auto &I : *L->getHeader()) {
+    if (auto *PN = dyn_cast<PHINode>(&I))
+      Phis.push_back(PN);
+    else
+      break;
+  }
+
+  if (TTI)
+    std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+      // Put pointers at the back and make sure pointer < pointer = false.
+      if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+        return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
+      return RHS->getType()->getPrimitiveSizeInBits() <
+             LHS->getType()->getPrimitiveSizeInBits();
+    });
+
+  unsigned NumElim = 0;
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  // Process phis from wide to narrow. Map wide phis to their truncation
+  // so narrow phis can reuse them.
+  for (PHINode *Phi : Phis) {
+    auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+      if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+        return V;
+      if (!SE.isSCEVable(PN->getType()))
+        return nullptr;
+      auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+      if (!Const)
+        return nullptr;
+      return Const->getValue();
+    };
+
+    // Fold constant phis. They may be congruent to other constant phis and
+    // would confuse the logic below that expects proper IVs.
+    if (Value *V = SimplifyPHINode(Phi)) {
+      if (V->getType() != Phi->getType())
+        continue;
+      Phi->replaceAllUsesWith(V);
+      DeadInsts.emplace_back(Phi);
+      ++NumElim;
+      DEBUG_WITH_TYPE(DebugType, dbgs()
+                      << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
+      continue;
+    }
+
+    if (!SE.isSCEVable(Phi->getType()))
+      continue;
+
+    PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+    if (!OrigPhiRef) {
+      OrigPhiRef = Phi;
+      if (Phi->getType()->isIntegerTy() && TTI
+          && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+        // This phi can be freely truncated to the narrowest phi type. Map the
+        // truncated expression to it so it will be reused for narrow types.
+        const SCEV *TruncExpr =
+          SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+        ExprToIVMap[TruncExpr] = Phi;
+      }
+      continue;
+    }
+
+    // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+    // sense.
+    if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
+      continue;
+
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+      // If this phi has the same width but is more canonical, replace the
+      // original with it. As part of the "more canonical" determination,
+      // respect a prior decision to use an IV chain.
+      if (OrigPhiRef->getType() == Phi->getType()
+          && !(ChainedPhis.count(Phi)
+               || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
+          && (ChainedPhis.count(Phi)
+              || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
+        std::swap(OrigPhiRef, Phi);
+        std::swap(OrigInc, IsomorphicInc);
+      }
+      // Replacing the congruent phi is sufficient because acyclic redundancy
+      // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+      // that a phi is congruent, it's often the head of an IV user cycle that
+      // is isomorphic with the original phi. It's worth eagerly cleaning up the
+      // common case of a single IV increment so that DeleteDeadPHIs can remove
+      // cycles that had postinc uses.
+      const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc),
+                                                   IsomorphicInc->getType());
+      if (OrigInc != IsomorphicInc
+          && TruncExpr == SE.getSCEV(IsomorphicInc)
+          && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
+              || hoistIVInc(OrigInc, IsomorphicInc))) {
+        DEBUG_WITH_TYPE(DebugType, dbgs()
+                        << "INDVARS: Eliminated congruent iv.inc: "
+                        << *IsomorphicInc << '\n');
+        Value *NewInc = OrigInc;
+        if (OrigInc->getType() != IsomorphicInc->getType()) {
+          Instruction *IP = nullptr;
+          if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+            IP = &*PN->getParent()->getFirstInsertionPt();
+          else
+            IP = OrigInc->getNextNode();
+
+          IRBuilder<> Builder(IP);
+          Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+          NewInc = Builder.
+            CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
+        }
+        IsomorphicInc->replaceAllUsesWith(NewInc);
+        DeadInsts.emplace_back(IsomorphicInc);
+      }
+    }
+    DEBUG_WITH_TYPE(DebugType, dbgs()
+                    << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElim;
+    Value *NewIV = OrigPhiRef;
+    if (OrigPhiRef->getType() != Phi->getType()) {
+      IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
+      Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+      NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+    }
+    Phi->replaceAllUsesWith(NewIV);
+    DeadInsts.emplace_back(Phi);
+  }
+  return NumElim;
+}
+
+Value *SCEVExpander::findExistingExpansion(const SCEV *S,
+                                           const Instruction *At, Loop *L) {
+  using namespace llvm::PatternMatch;
+
+  SmallVector<BasicBlock *, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Look for suitable value in simple conditions at the loop exits.
+  for (BasicBlock *BB : ExitingBlocks) {
+    ICmpInst::Predicate Pred;
+    Instruction *LHS, *RHS;
+    BasicBlock *TrueBB, *FalseBB;
+
+    if (!match(BB->getTerminator(),
+               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+                    TrueBB, FalseBB)))
+      continue;
+
+    if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+      return LHS;
+
+    if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+      return RHS;
+  }
+
+  // There is potential to make this significantly smarter, but this simple
+  // heuristic already gets some interesting cases.
+
+  // Can not find suitable value.
+  return nullptr;
+}
+
+bool SCEVExpander::isHighCostExpansionHelper(
+    const SCEV *S, Loop *L, const Instruction *At,
+    SmallPtrSetImpl<const SCEV *> &Processed) {
+
+  // If we can find an existing value for this scev avaliable at the point "At"
+  // then consider the expression cheap.
+  if (At && findExistingExpansion(S, At, L) != nullptr)
+    return false;
+
+  // Zero/One operand expressions
+  switch (S->getSCEVType()) {
+  case scUnknown:
+  case scConstant:
+    return false;
+  case scTruncate:
+    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
+                                     L, At, Processed);
+  case scZeroExtend:
+    return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+                                     L, At, Processed);
+  case scSignExtend:
+    return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
+                                     L, At, Processed);
+  }
+
+  if (!Processed.insert(S).second)
+    return false;
+
+  if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+    // If the divisor is a power of two and the SCEV type fits in a native
+    // integer, consider the division cheap irrespective of whether it occurs in
+    // the user code since it can be lowered into a right shift.
+    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+      if (SC->getAPInt().isPowerOf2()) {
+        const DataLayout &DL =
+            L->getHeader()->getParent()->getParent()->getDataLayout();
+        unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+        return DL.isIllegalInteger(Width);
+      }
+
+    // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+    // HowManyLessThans produced to compute a precise expression, rather than a
+    // UDiv from the user's code. If we can't find a UDiv in the code with some
+    // simple searching, assume the former consider UDivExpr expensive to
+    // compute.
+    BasicBlock *ExitingBB = L->getExitingBlock();
+    if (!ExitingBB)
+      return true;
+
+    // At the beginning of this function we already tried to find existing value
+    // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
+    // involving division. This is just a simple search heuristic.
+    if (!At)
+      At = &ExitingBB->back();
+    if (!findExistingExpansion(
+            SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
+      return true;
+  }
+
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // Recurse past nary expressions, which commonly occur in the
+  // BackedgeTakenCount. They may already exist in program code, and if not,
+  // they are not too expensive rematerialize.
+  if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
+    for (auto *Op : NAry->operands())
+      if (isHighCostExpansionHelper(Op, L, At, Processed))
+        return true;
+  }
+
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
+  return false;
+}
+
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+                                            Instruction *IP) {
+  assert(IP);
+  switch (Pred->getKind()) {
+  case SCEVPredicate::P_Union:
+    return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+  case SCEVPredicate::P_Equal:
+    return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+  }
+  llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
+                                          Instruction *IP) {
+  Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+  Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+  Builder.SetInsertPoint(IP);
+  auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+  return I;
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+                                          Instruction *IP) {
+  auto *BoolType = IntegerType::get(IP->getContext(), 1);
+  Value *Check = ConstantInt::getNullValue(BoolType);
+
+  // Loop over all checks in this set.
+  for (auto Pred : Union->getPredicates()) {
+    auto *NextCheck = expandCodeForPredicate(Pred, IP);
+    Builder.SetInsertPoint(IP);
+    Check = Builder.CreateOr(Check, NextCheck);
+  }
+
+  return Check;
+}
+
+namespace {
+// Search for a SCEV subexpression that is not safe to expand.  Any expression
+// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
+// UDiv expressions. We don't know if the UDiv is derived from an IR divide
+// instruction, but the important thing is that we prove the denominator is
+// nonzero before expansion.
+//
+// IVUsers already checks that IV-derived expressions are safe. So this check is
+// only needed when the expression includes some subexpression that is not IV
+// derived.
+//
+// Currently, we only allow division by a nonzero constant here. If this is
+// inadequate, we could easily allow division by SCEVUnknown by using
+// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
+struct SCEVFindUnsafe {
+  ScalarEvolution &SE;
+  bool IsUnsafe;
+
+  SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
+
+  bool follow(const SCEV *S) {
+    if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+      if (!SC || SC->getValue()->isZero()) {
+        IsUnsafe = true;
+        return false;
+      }
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      const SCEV *Step = AR->getStepRecurrence(SE);
+      if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+        IsUnsafe = true;
+        return false;
+      }
+    }
+    return true;
+  }
+  bool isDone() const { return IsUnsafe; }
+};
+}
+
+namespace llvm {
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+  SCEVFindUnsafe Search(SE);
+  visitAll(S, Search);
+  return !Search.IsUnsafe;
+}
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 0000000..b7fd5d5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,254 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
+  // to a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == Operand &&
+        !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+      return false;
+
+  // Okay, all uses of Operand by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+namespace {
+
+/// Hold the state used during post-inc expression transformation, including a
+/// map of transformed expressions.
+class PostIncTransform {
+  TransformKind Kind;
+  PostIncLoopSet &Loops;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+
+  DenseMap<const SCEV*, const SCEV*> Transformed;
+
+public:
+  PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
+                   ScalarEvolution &se, DominatorTree &dt):
+    Kind(kind), Loops(loops), SE(se), DT(dt) {}
+
+  const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
+                               Value *OperandValToReplace);
+
+protected:
+  const SCEV *TransformImpl(const SCEV *S, Instruction *User,
+                            Value *OperandValToReplace);
+};
+
+} // namespace
+
+/// Implement post-inc transformation for all valid expression types.
+const SCEV *PostIncTransform::
+TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // An addrec. This is the interesting part.
+    SmallVector<const SCEV *, 8> Operands;
+    const Loop *L = AR->getLoop();
+    // The addrec conceptually uses its operands at loop entry.
+    Instruction *LUser = &L->getHeader()->front();
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I) {
+      Operands.push_back(TransformSubExpr(*I, LUser, nullptr));
+    }
+    // Conservatively use AnyWrap until/unless we need FlagNW.
+    const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+    switch (Kind) {
+    case NormalizeAutodetect:
+      // Normalize this SCEV by subtracting the expression for the final step.
+      // We only allow affine AddRecs to be normalized, otherwise we would not
+      // be able to correctly denormalize.
+      // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+      // Normalized form:   {-2,+,1,+,2}
+      // Denormalized form: {1,+,3,+,2}
+      //
+      // However, denormalization would use a different step expression than
+      // normalization (see getPostIncExpr), generating the wrong final
+      // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+      if (AR->isAffine() &&
+          IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+        const SCEV *TransformedStep =
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+        Loops.insert(L);
+      }
+#if 0
+      // This assert is conceptually correct, but ScalarEvolution currently
+      // sometimes fails to canonicalize two equal SCEVs to exactly the same
+      // form. It's possibly a pessimization when this happens, but it isn't a
+      // correctness problem, so disable this assert for now.
+      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Normalize:
+      // We want to normalize step expression, because otherwise we might not be
+      // able to denormalize to the original expression.
+      //
+      // Here is an example what will happen if we don't normalize step:
+      //  ORIGINAL ISE:
+      //    {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
+      //  NORMALIZED ISE:
+      //    {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
+      //     (100 /u {0,+,1}<%bb16>)}<%bb25>
+      //  DENORMALIZED BACK ISE:
+      //    {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
+      //     (100 /u {1,+,1}<%bb16>)}<%bb25>
+      //  Note that the initial value changes after normalization +
+      //  denormalization, which isn't correct.
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+      }
+#if 0
+      // See the comment on the assert above.
+      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Denormalize:
+      // Here we want to normalize step expressions for the same reasons, as
+      // stated above.
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
+        Result = SE.getAddExpr(Result, TransformedStep);
+      }
+      break;
+    }
+    return Result;
+  }
+
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    // If any operand actually changed, return a transformed result.
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
+    const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+
+  llvm_unreachable("Unexpected SCEV kind!");
+}
+
+/// Manage recursive transformation across an expression DAG. Revisiting
+/// expressions would lead to exponential recursion.
+const SCEV *PostIncTransform::
+TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+
+  const SCEV *Result = Transformed.lookup(S);
+  if (Result)
+    return Result;
+
+  Result = TransformImpl(S, User, OperandValToReplace);
+  Transformed[S] = Result;
+  return Result;
+}
+
+/// Top level driver for transforming an expression DAG into its requested
+/// post-inc form (either "Normalized" or "Denormalized").
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  PostIncTransform Transform(Kind, Loops, SE, DT);
+  return Transform.TransformSubExpr(S, User, OperandValToReplace);
+}
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
new file mode 100644
index 0000000..029997a
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -0,0 +1,212 @@
+//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScopedNoAlias alias-analysis pass, which implements
+// metadata-based scoped no-alias support.
+//
+// Alias-analysis scopes are defined by an id (which can be a string or some
+// other metadata node), a domain node, and an optional descriptive string.
+// A domain is defined by an id (which can be a string or some other metadata
+// node), and an optional descriptive string.
+//
+// !dom0 =   metadata !{ metadata !"domain of foo()" }
+// !scope1 = metadata !{ metadata !scope1, metadata !dom0, metadata !"scope 1" }
+// !scope2 = metadata !{ metadata !scope2, metadata !dom0, metadata !"scope 2" }
+//
+// Loads and stores can be tagged with an alias-analysis scope, and also, with
+// a noalias tag for a specific scope:
+//
+// ... = load %ptr1, !alias.scope !{ !scope1 }
+// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
+//
+// When evaluating an aliasing query, if one of the instructions is associated
+// has a set of noalias scopes in some domain that is superset of the alias
+// scopes in that domain of some other instruction, then the two memory
+// accesses are assumed not to alias.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// A handy option for disabling scoped no-alias functionality. The same effect
+// can also be achieved by stripping the associated metadata tags from IR, but
+// this option is sometimes more convenient.
+static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
+                                         cl::init(true));
+
+namespace {
+/// AliasScopeNode - This is a simple wrapper around an MDNode which provides
+/// a higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class AliasScopeNode {
+  const MDNode *Node;
+
+public:
+  AliasScopeNode() : Node(nullptr) {}
+  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+  /// getNode - Get the MDNode for this AliasScopeNode.
+  const MDNode *getNode() const { return Node; }
+
+  /// getDomain - Get the MDNode for this AliasScopeNode's domain.
+  const MDNode *getDomain() const {
+    if (Node->getNumOperands() < 2)
+      return nullptr;
+    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+  }
+};
+} // end of anonymous namespace
+
+AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
+                                         const MemoryLocation &LocB) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::alias(LocA, LocB);
+
+  // Get the attached MDNodes.
+  const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
+
+  const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias;
+
+  if (!mayAliasInScopes(AScopes, BNoAlias))
+    return NoAlias;
+
+  if (!mayAliasInScopes(BScopes, ANoAlias))
+    return NoAlias;
+
+  // If they may alias, chain to the next AliasAnalysis.
+  return AAResultBase::alias(LocA, LocB);
+}
+
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+                                                const MemoryLocation &Loc) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::getModRefInfo(CS, Loc);
+
+  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+                                              LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  if (!mayAliasInScopes(
+          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          Loc.AATags.NoAlias))
+    return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                                ImmutableCallSite CS2) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::getModRefInfo(CS1, CS2);
+
+  if (!mayAliasInScopes(
+          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  if (!mayAliasInScopes(
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS1, CS2);
+}
+
+void ScopedNoAliasAAResult::collectMDInDomain(
+    const MDNode *List, const MDNode *Domain,
+    SmallPtrSetImpl<const MDNode *> &Nodes) const {
+  for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
+    if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
+      if (AliasScopeNode(MD).getDomain() == Domain)
+        Nodes.insert(MD);
+}
+
+bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
+                                             const MDNode *NoAlias) const {
+  if (!Scopes || !NoAlias)
+    return true;
+
+  // Collect the set of scope domains relevant to the noalias scopes.
+  SmallPtrSet<const MDNode *, 16> Domains;
+  for (unsigned i = 0, ie = NoAlias->getNumOperands(); i != ie; ++i)
+    if (const MDNode *NAMD = dyn_cast<MDNode>(NoAlias->getOperand(i)))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        Domains.insert(Domain);
+
+  // We alias unless, for some domain, the set of noalias scopes in that domain
+  // is a superset of the set of alias scopes in that domain.
+  for (const MDNode *Domain : Domains) {
+    SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
+    collectMDInDomain(NoAlias, Domain, NANodes);
+    collectMDInDomain(Scopes, Domain, ScopeNodes);
+    if (!ScopeNodes.size())
+      continue;
+
+    // To not alias, all of the nodes in ScopeNodes must be in NANodes.
+    bool FoundAll = true;
+    for (const MDNode *SMD : ScopeNodes)
+      if (!NANodes.count(SMD)) {
+        FoundAll = false;
+        break;
+      }
+
+    if (FoundAll)
+      return false;
+  }
+
+  return true;
+}
+
+ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
+                                           AnalysisManager<Function> *AM) {
+  return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char ScopedNoAliasAA::PassID;
+
+char ScopedNoAliasAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+                      "Scoped NoAlias Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+                    "Scoped NoAlias Alias Analysis", false, true)
+
+ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() {
+  return new ScopedNoAliasAAWrapperPass();
+}
+
+ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) {
+  initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new ScopedNoAliasAAResult(
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 0000000..f5a927b
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sparseprop"
+
+//===----------------------------------------------------------------------===//
+//                  AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+  if (V == UndefVal)
+    OS << "undefined";
+  else if (V == OverdefinedVal)
+    OS << "overdefined";
+  else if (V == UntrackedVal)
+    OS << "untracked";
+  else
+    OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+//                          SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet.   This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+  if (I != ValueState.end()) return I->second;  // Common case, in the map
+  
+  LatticeVal LV;
+  if (LatticeFunc->IsUntrackedValue(V))
+    return LatticeFunc->getUntrackedVal();
+  else if (Constant *C = dyn_cast<Constant>(V))
+    LV = LatticeFunc->ComputeConstant(C);
+  else if (Argument *A = dyn_cast<Argument>(V))
+    LV = LatticeFunc->ComputeArgument(A);
+  else if (!isa<Instruction>(V))
+    // All other non-instructions are overdefined.
+    LV = LatticeFunc->getOverdefinedVal();
+  else
+    // All instructions are underdefined by default.
+    LV = LatticeFunc->getUndefVal();
+  
+  // If this value is untracked, don't add it to the map.
+  if (LV == LatticeFunc->getUntrackedVal())
+    return LV;
+  return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+  if (I != ValueState.end() && I->second == V)
+    return;  // No change.
+  
+  // An update.  Visit uses of I.
+  ValueState[&Inst] = V;
+  InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+  DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+  BBExecutable.insert(BB);   // Basic block is executable!
+  BBWorkList.push_back(BB);  // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+  if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+    return;  // This edge is already known to be executable!
+  
+  DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+        << " -> " << Dest->getName() << "\n");
+
+  if (BBExecutable.count(Dest)) {
+    // The destination is already executable, but we just made an edge
+    // feasible that wasn't before.  Revisit the PHI nodes in the block
+    // because they have potentially new operands.
+    for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+      visitPHINode(*cast<PHINode>(I));
+    
+  } else {
+    MarkBlockExecutable(Dest);
+  }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+                                         SmallVectorImpl<bool> &Succs,
+                                         bool AggressiveUndef) {
+  Succs.resize(TI.getNumSuccessors());
+  if (TI.getNumSuccessors() == 0) return;
+  
+  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+    if (BI->isUnconditional()) {
+      Succs[0] = true;
+      return;
+    }
+    
+    LatticeVal BCValue;
+    if (AggressiveUndef)
+      BCValue = getOrInitValueState(BI->getCondition());
+    else
+      BCValue = getLatticeState(BI->getCondition());
+    
+    if (BCValue == LatticeFunc->getOverdefinedVal() ||
+        BCValue == LatticeFunc->getUntrackedVal()) {
+      // Overdefined condition variables can branch either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // If undefined, neither is feasible yet.
+    if (BCValue == LatticeFunc->getUndefVal())
+      return;
+
+    Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+    if (!C || !isa<ConstantInt>(C)) {
+      // Non-constant values can go either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // Constant condition variables mean the branch can only go a single way
+    Succs[C->isNullValue()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
+    // Invoke instructions successors are always executable.
+    // TODO: Could ask the lattice function if the value can throw.
+    Succs[0] = Succs[1] = true;
+    return;
+  }
+  
+  if (isa<IndirectBrInst>(TI)) {
+    Succs.assign(Succs.size(), true);
+    return;
+  }
+  
+  SwitchInst &SI = cast<SwitchInst>(TI);
+  LatticeVal SCValue;
+  if (AggressiveUndef)
+    SCValue = getOrInitValueState(SI.getCondition());
+  else
+    SCValue = getLatticeState(SI.getCondition());
+  
+  if (SCValue == LatticeFunc->getOverdefinedVal() ||
+      SCValue == LatticeFunc->getUntrackedVal()) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+  // If undefined, neither is feasible yet.
+  if (SCValue == LatticeFunc->getUndefVal())
+    return;
+  
+  Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+  if (!C || !isa<ConstantInt>(C)) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+  Succs[Case.getSuccessorIndex()] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+                                  bool AggressiveUndef) {
+  SmallVector<bool, 16> SuccFeasible;
+  TerminatorInst *TI = From->getTerminator();
+  getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+  
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    if (TI->getSuccessor(i) == To && SuccFeasible[i])
+      return true;
+  
+  return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+  SmallVector<bool, 16> SuccFeasible;
+  getFeasibleSuccessors(TI, SuccFeasible, true);
+  
+  BasicBlock *BB = TI.getParent();
+  
+  // Mark all feasible successors executable...
+  for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+    if (SuccFeasible[i])
+      markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+  // The lattice function may store more information on a PHINode than could be
+  // computed from its incoming values.  For example, SSI form stores its sigma
+  // functions as PHINodes with a single incoming value.
+  if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+    LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+    if (IV != LatticeFunc->getUntrackedVal())
+      UpdateState(PN, IV);
+    return;
+  }
+
+  LatticeVal PNIV = getOrInitValueState(&PN);
+  LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+  
+  // If this value is already overdefined (common) just return.
+  if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+    return;  // Quick exit
+  
+  // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+  // and slow us down a lot.  Just mark them overdefined.
+  if (PN.getNumIncomingValues() > 64) {
+    UpdateState(PN, Overdefined);
+    return;
+  }
+  
+  // Look at all of the executable operands of the PHI node.  If any of them
+  // are overdefined, the PHI becomes overdefined as well.  Otherwise, ask the
+  // transfer function to give us the merge of the incoming values.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+      continue;
+    
+    // Merge in this value.
+    LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+    if (OpVal != PNIV)
+      PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+    
+    if (PNIV == Overdefined)
+      break;  // Rest of input values don't matter.
+  }
+
+  // Update the PHI with the compute value, which is the merge of the inputs.
+  UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+  // PHIs are handled by the propagation logic, they are never passed into the
+  // transfer functions.
+  if (PHINode *PN = dyn_cast<PHINode>(&I))
+    return visitPHINode(*PN);
+  
+  // Otherwise, ask the transfer function what the result is.  If this is
+  // something that we care about, remember it.
+  LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+  if (IV != LatticeFunc->getUntrackedVal())
+    UpdateState(I, IV);
+  
+  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+    visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+  MarkBlockExecutable(&F.getEntryBlock());
+  
+  // Process the work lists until they are empty!
+  while (!BBWorkList.empty() || !InstWorkList.empty()) {
+    // Process the instruction work list.
+    while (!InstWorkList.empty()) {
+      Instruction *I = InstWorkList.back();
+      InstWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+      // "I" got into the work list because it made a transition.  See if any
+      // users are both live and in need of updating.
+      for (User *U : I->users()) {
+        Instruction *UI = cast<Instruction>(U);
+        if (BBExecutable.count(UI->getParent()))   // Inst is executable?
+          visitInst(*UI);
+      }
+    }
+
+    // Process the basic block work list.
+    while (!BBWorkList.empty()) {
+      BasicBlock *BB = BBWorkList.back();
+      BBWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+      // Notify all instructions in this basic block that they are newly
+      // executable.
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        visitInst(*I);
+    }
+  }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+  OS << "\nFUNCTION: " << F.getName() << "\n";
+  for (auto &BB : F) {
+    if (!BBExecutable.count(&BB))
+      OS << "INFEASIBLE: ";
+    OS << "\t";
+    if (BB.hasName())
+      OS << BB.getName() << ":\n";
+    else
+      OS << "; anon bb\n";
+    for (auto &I : BB) {
+      LatticeFunc->PrintValue(getLatticeState(&I), OS);
+      OS << I << "\n";
+    }
+    
+    OS << "\n";
+  }
+}
+
diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h
new file mode 100644
index 0000000..fd3fbc0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/StratifiedSets.h
@@ -0,0 +1,692 @@
+//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRATIFIEDSETS_H
+#define LLVM_ADT_STRATIFIEDSETS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include <bitset>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+// \brief An index into Stratified Sets.
+typedef unsigned StratifiedIndex;
+// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
+// ~1M sets exist.
+
+// \brief Container of information related to a value in a StratifiedSet.
+struct StratifiedInfo {
+  StratifiedIndex Index;
+  // For field sensitivity, etc. we can tack attributes on to this struct.
+};
+
+// The number of attributes that StratifiedAttrs should contain. Attributes are
+// described below, and 32 was an arbitrary choice because it fits nicely in 32
+// bits (because we use a bitset for StratifiedAttrs).
+static const unsigned NumStratifiedAttrs = 32;
+
+// These are attributes that the users of StratifiedSets/StratifiedSetBuilders
+// may use for various purposes. These also have the special property of that
+// they are merged down. So, if set A is above set B, and one decides to set an
+// attribute in set A, then the attribute will automatically be set in set B.
+typedef std::bitset<NumStratifiedAttrs> StratifiedAttrs;
+
+// \brief A "link" between two StratifiedSets.
+struct StratifiedLink {
+  // \brief This is a value used to signify "does not exist" where
+  // the StratifiedIndex type is used. This is used instead of
+  // Optional<StratifiedIndex> because Optional<StratifiedIndex> would
+  // eat up a considerable amount of extra memory, after struct
+  // padding/alignment is taken into account.
+  static const StratifiedIndex SetSentinel;
+
+  // \brief The index for the set "above" current
+  StratifiedIndex Above;
+
+  // \brief The link for the set "below" current
+  StratifiedIndex Below;
+
+  // \brief Attributes for these StratifiedSets.
+  StratifiedAttrs Attrs;
+
+  StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
+
+  bool hasBelow() const { return Below != SetSentinel; }
+  bool hasAbove() const { return Above != SetSentinel; }
+
+  void clearBelow() { Below = SetSentinel; }
+  void clearAbove() { Above = SetSentinel; }
+};
+
+// \brief These are stratified sets, as described in "Fast algorithms for
+// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
+// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
+// of Value*s. If two Value*s are in the same set, or if both sets have 
+// overlapping attributes, then the Value*s are said to alias.
+//
+// Sets may be related by position, meaning that one set may be considered as
+// above or below another. In CFL Alias Analysis, this gives us an indication
+// of how two variables are related; if the set of variable A is below a set
+// containing variable B, then at some point, a variable that has interacted
+// with B (or B itself) was either used in order to extract the variable A, or
+// was used as storage of variable A.
+//
+// Sets may also have attributes (as noted above). These attributes are
+// generally used for noting whether a variable in the set has interacted with
+// a variable whose origins we don't quite know (i.e. globals/arguments), or if
+// the variable may have had operations performed on it (modified in a function
+// call). All attributes that exist in a set A must exist in all sets marked as
+// below set A.
+template <typename T> class StratifiedSets {
+public:
+  StratifiedSets() {}
+
+  StratifiedSets(DenseMap<T, StratifiedInfo> Map,
+                 std::vector<StratifiedLink> Links)
+      : Values(std::move(Map)), Links(std::move(Links)) {}
+
+  StratifiedSets(StratifiedSets<T> &&Other) { *this = std::move(Other); }
+
+  StratifiedSets &operator=(StratifiedSets<T> &&Other) {
+    Values = std::move(Other.Values);
+    Links = std::move(Other.Links);
+    return *this;
+  }
+
+  Optional<StratifiedInfo> find(const T &Elem) const {
+    auto Iter = Values.find(Elem);
+    if (Iter == Values.end()) {
+      return NoneType();
+    }
+    return Iter->second;
+  }
+
+  const StratifiedLink &getLink(StratifiedIndex Index) const {
+    assert(inbounds(Index));
+    return Links[Index];
+  }
+
+private:
+  DenseMap<T, StratifiedInfo> Values;
+  std::vector<StratifiedLink> Links;
+
+  bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
+};
+
+// \brief Generic Builder class that produces StratifiedSets instances.
+//
+// The goal of this builder is to efficiently produce correct StratifiedSets
+// instances. To this end, we use a few tricks:
+//   > Set chains (A method for linking sets together)
+//   > Set remaps (A method for marking a set as an alias [irony?] of another)
+//
+// ==== Set chains ====
+// This builder has a notion of some value A being above, below, or with some
+// other value B:
+//   > The `A above B` relationship implies that there is a reference edge going
+//   from A to B. Namely, it notes that A can store anything in B's set.
+//   > The `A below B` relationship is the opposite of `A above B`. It implies
+//   that there's a dereference edge going from A to B.
+//   > The `A with B` relationship states that there's an assignment edge going
+//   from A to B, and that A and B should be treated as equals.
+//
+// As an example, take the following code snippet:
+//
+// %a = alloca i32, align 4
+// %ap = alloca i32*, align 8
+// %app = alloca i32**, align 8
+// store %a, %ap
+// store %ap, %app
+// %aw = getelementptr %ap, 0
+//
+// Given this, the follow relations exist:
+//   - %a below %ap & %ap above %a
+//   - %ap below %app & %app above %ap
+//   - %aw with %ap & %ap with %aw
+//
+// These relations produce the following sets:
+//   [{%a}, {%ap, %aw}, {%app}]
+//
+// ...Which states that the only MayAlias relationship in the above program is
+// between %ap and %aw.
+//
+// Life gets more complicated when we actually have logic in our programs. So,
+// we either must remove this logic from our programs, or make consessions for
+// it in our AA algorithms. In this case, we have decided to select the latter
+// option.
+//
+// First complication: Conditionals
+// Motivation:
+//  %ad = alloca int, align 4
+//  %a = alloca int*, align 8
+//  %b = alloca int*, align 8
+//  %bp = alloca int**, align 8
+//  %c = call i1 @SomeFunc()
+//  %k = select %c, %ad, %bp
+//  store %ad, %a
+//  store %b, %bp
+//
+// %k has 'with' edges to both %a and %b, which ordinarily would not be linked
+// together. So, we merge the set that contains %a with the set that contains
+// %b. We then recursively merge the set above %a with the set above %b, and
+// the set below  %a with the set below %b, etc. Ultimately, the sets for this
+// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below
+// {%a, %b, %c} is below {%ad}.
+//
+// Second complication: Arbitrary casts
+// Motivation:
+//  %ip = alloca int*, align 8
+//  %ipp = alloca int**, align 8
+//  %i = bitcast ipp to int
+//  store %ip, %ipp
+//  store %i, %ip
+//
+// This is impossible to construct with any of the rules above, because a set
+// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed
+// to be below the set with %ip, and the set with %ip is supposed to be below
+// the set with %ipp. Because we don't allow circular relationships like this,
+// we merge all concerned sets into one. So, the above code would generate a
+// single StratifiedSet: {%ip, %ipp, %i}.
+//
+// ==== Set remaps ====
+// More of an implementation detail than anything -- when merging sets, we need
+// to update the numbers of all of the elements mapped to those sets. Rather
+// than doing this at each merge, we note in the BuilderLink structure that a
+// remap has occurred, and use this information so we can defer renumbering set
+// elements until build time.
+template <typename T> class StratifiedSetsBuilder {
+  // \brief Represents a Stratified Set, with information about the Stratified
+  // Set above it, the set below it, and whether the current set has been
+  // remapped to another.
+  struct BuilderLink {
+    const StratifiedIndex Number;
+
+    BuilderLink(StratifiedIndex N) : Number(N) {
+      Remap = StratifiedLink::SetSentinel;
+    }
+
+    bool hasAbove() const {
+      assert(!isRemapped());
+      return Link.hasAbove();
+    }
+
+    bool hasBelow() const {
+      assert(!isRemapped());
+      return Link.hasBelow();
+    }
+
+    void setBelow(StratifiedIndex I) {
+      assert(!isRemapped());
+      Link.Below = I;
+    }
+
+    void setAbove(StratifiedIndex I) {
+      assert(!isRemapped());
+      Link.Above = I;
+    }
+
+    void clearBelow() {
+      assert(!isRemapped());
+      Link.clearBelow();
+    }
+
+    void clearAbove() {
+      assert(!isRemapped());
+      Link.clearAbove();
+    }
+
+    StratifiedIndex getBelow() const {
+      assert(!isRemapped());
+      assert(hasBelow());
+      return Link.Below;
+    }
+
+    StratifiedIndex getAbove() const {
+      assert(!isRemapped());
+      assert(hasAbove());
+      return Link.Above;
+    }
+
+    StratifiedAttrs &getAttrs() {
+      assert(!isRemapped());
+      return Link.Attrs;
+    }
+
+    void setAttr(unsigned index) {
+      assert(!isRemapped());
+      assert(index < NumStratifiedAttrs);
+      Link.Attrs.set(index);
+    }
+
+    void setAttrs(const StratifiedAttrs &other) {
+      assert(!isRemapped());
+      Link.Attrs |= other;
+    }
+
+    bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
+
+    // \brief For initial remapping to another set
+    void remapTo(StratifiedIndex Other) {
+      assert(!isRemapped());
+      Remap = Other;
+    }
+
+    StratifiedIndex getRemapIndex() const {
+      assert(isRemapped());
+      return Remap;
+    }
+
+    // \brief Should only be called when we're already remapped.
+    void updateRemap(StratifiedIndex Other) {
+      assert(isRemapped());
+      Remap = Other;
+    }
+
+    // \brief Prefer the above functions to calling things directly on what's
+    // returned from this -- they guard against unexpected calls when the
+    // current BuilderLink is remapped.
+    const StratifiedLink &getLink() const { return Link; }
+
+  private:
+    StratifiedLink Link;
+    StratifiedIndex Remap;
+  };
+
+  // \brief This function performs all of the set unioning/value renumbering
+  // that we've been putting off, and generates a vector<StratifiedLink> that
+  // may be placed in a StratifiedSets instance.
+  void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
+    DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
+    for (auto &Link : Links) {
+      if (Link.isRemapped()) {
+        continue;
+      }
+
+      StratifiedIndex Number = StratLinks.size();
+      Remaps.insert(std::make_pair(Link.Number, Number));
+      StratLinks.push_back(Link.getLink());
+    }
+
+    for (auto &Link : StratLinks) {
+      if (Link.hasAbove()) {
+        auto &Above = linksAt(Link.Above);
+        auto Iter = Remaps.find(Above.Number);
+        assert(Iter != Remaps.end());
+        Link.Above = Iter->second;
+      }
+
+      if (Link.hasBelow()) {
+        auto &Below = linksAt(Link.Below);
+        auto Iter = Remaps.find(Below.Number);
+        assert(Iter != Remaps.end());
+        Link.Below = Iter->second;
+      }
+    }
+
+    for (auto &Pair : Values) {
+      auto &Info = Pair.second;
+      auto &Link = linksAt(Info.Index);
+      auto Iter = Remaps.find(Link.Number);
+      assert(Iter != Remaps.end());
+      Info.Index = Iter->second;
+    }
+  }
+
+  // \brief There's a guarantee in StratifiedLink where all bits set in a
+  // Link.externals will be set in all Link.externals "below" it.
+  static void propagateAttrs(std::vector<StratifiedLink> &Links) {
+    const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
+      const auto *Link = &Links[Idx];
+      while (Link->hasAbove()) {
+        Idx = Link->Above;
+        Link = &Links[Idx];
+      }
+      return Idx;
+    };
+
+    SmallSet<StratifiedIndex, 16> Visited;
+    for (unsigned I = 0, E = Links.size(); I < E; ++I) {
+      auto CurrentIndex = getHighestParentAbove(I);
+      if (!Visited.insert(CurrentIndex).second) {
+        continue;
+      }
+
+      while (Links[CurrentIndex].hasBelow()) {
+        auto &CurrentBits = Links[CurrentIndex].Attrs;
+        auto NextIndex = Links[CurrentIndex].Below;
+        auto &NextBits = Links[NextIndex].Attrs;
+        NextBits |= CurrentBits;
+        CurrentIndex = NextIndex;
+      }
+    }
+  }
+
+public:
+  // \brief Builds a StratifiedSet from the information we've been given since
+  // either construction or the prior build() call.
+  StratifiedSets<T> build() {
+    std::vector<StratifiedLink> StratLinks;
+    finalizeSets(StratLinks);
+    propagateAttrs(StratLinks);
+    Links.clear();
+    return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
+  }
+
+  std::size_t size() const { return Values.size(); }
+  std::size_t numSets() const { return Links.size(); }
+
+  bool has(const T &Elem) const { return get(Elem).hasValue(); }
+
+  bool add(const T &Main) {
+    if (get(Main).hasValue())
+      return false;
+
+    auto NewIndex = getNewUnlinkedIndex();
+    return addAtMerging(Main, NewIndex);
+  }
+
+  // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+  // set above "Main". There are some cases where this is not possible (see
+  // above), so we merge them such that ToAdd and Main are in the same set.
+  bool addAbove(const T &Main, const T &ToAdd) {
+    assert(has(Main));
+    auto Index = *indexOf(Main);
+    if (!linksAt(Index).hasAbove())
+      addLinkAbove(Index);
+
+    auto Above = linksAt(Index).getAbove();
+    return addAtMerging(ToAdd, Above);
+  }
+
+  // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+  // set below "Main". There are some cases where this is not possible (see
+  // above), so we merge them such that ToAdd and Main are in the same set.
+  bool addBelow(const T &Main, const T &ToAdd) {
+    assert(has(Main));
+    auto Index = *indexOf(Main);
+    if (!linksAt(Index).hasBelow())
+      addLinkBelow(Index);
+
+    auto Below = linksAt(Index).getBelow();
+    return addAtMerging(ToAdd, Below);
+  }
+
+  bool addWith(const T &Main, const T &ToAdd) {
+    assert(has(Main));
+    auto MainIndex = *indexOf(Main);
+    return addAtMerging(ToAdd, MainIndex);
+  }
+
+  void noteAttribute(const T &Main, unsigned AttrNum) {
+    assert(has(Main));
+    assert(AttrNum < StratifiedLink::SetSentinel);
+    auto *Info = *get(Main);
+    auto &Link = linksAt(Info->Index);
+    Link.setAttr(AttrNum);
+  }
+
+  void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) {
+    assert(has(Main));
+    auto *Info = *get(Main);
+    auto &Link = linksAt(Info->Index);
+    Link.setAttrs(NewAttrs);
+  }
+
+  StratifiedAttrs getAttributes(const T &Main) {
+    assert(has(Main));
+    auto *Info = *get(Main);
+    auto *Link = &linksAt(Info->Index);
+    auto Attrs = Link->getAttrs();
+    while (Link->hasAbove()) {
+      Link = &linksAt(Link->getAbove());
+      Attrs |= Link->getAttrs();
+    }
+
+    return Attrs;
+  }
+
+  bool getAttribute(const T &Main, unsigned AttrNum) {
+    assert(AttrNum < StratifiedLink::SetSentinel);
+    auto Attrs = getAttributes(Main);
+    return Attrs[AttrNum];
+  }
+
+  // \brief Gets the attributes that have been applied to the set that Main
+  // belongs to. It ignores attributes in any sets above the one that Main
+  // resides in.
+  StratifiedAttrs getRawAttributes(const T &Main) {
+    assert(has(Main));
+    auto *Info = *get(Main);
+    auto &Link = linksAt(Info->Index);
+    return Link.getAttrs();
+  }
+
+  // \brief Gets an attribute from the attributes that have been applied to the
+  // set that Main belongs to. It ignores attributes in any sets above the one
+  // that Main resides in.
+  bool getRawAttribute(const T &Main, unsigned AttrNum) {
+    assert(AttrNum < StratifiedLink::SetSentinel);
+    auto Attrs = getRawAttributes(Main);
+    return Attrs[AttrNum];
+  }
+
+private:
+  DenseMap<T, StratifiedInfo> Values;
+  std::vector<BuilderLink> Links;
+
+  // \brief Adds the given element at the given index, merging sets if
+  // necessary.
+  bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
+    StratifiedInfo Info = {Index};
+    auto Pair = Values.insert(std::make_pair(ToAdd, Info));
+    if (Pair.second)
+      return true;
+
+    auto &Iter = Pair.first;
+    auto &IterSet = linksAt(Iter->second.Index);
+    auto &ReqSet = linksAt(Index);
+
+    // Failed to add where we wanted to. Merge the sets.
+    if (&IterSet != &ReqSet)
+      merge(IterSet.Number, ReqSet.Number);
+
+    return false;
+  }
+
+  // \brief Gets the BuilderLink at the given index, taking set remapping into
+  // account.
+  BuilderLink &linksAt(StratifiedIndex Index) {
+    auto *Start = &Links[Index];
+    if (!Start->isRemapped())
+      return *Start;
+
+    auto *Current = Start;
+    while (Current->isRemapped())
+      Current = &Links[Current->getRemapIndex()];
+
+    auto NewRemap = Current->Number;
+
+    // Run through everything that has yet to be updated, and update them to
+    // remap to NewRemap
+    Current = Start;
+    while (Current->isRemapped()) {
+      auto *Next = &Links[Current->getRemapIndex()];
+      Current->updateRemap(NewRemap);
+      Current = Next;
+    }
+
+    return *Current;
+  }
+
+  // \brief Merges two sets into one another. Assumes that these sets are not
+  // already one in the same
+  void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+    assert(inbounds(Idx1) && inbounds(Idx2));
+    assert(&linksAt(Idx1) != &linksAt(Idx2) &&
+           "Merging a set into itself is not allowed");
+
+    // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
+    // both the
+    // given sets, and all sets between them, into one.
+    if (tryMergeUpwards(Idx1, Idx2))
+      return;
+
+    if (tryMergeUpwards(Idx2, Idx1))
+      return;
+
+    // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
+    // We therefore need to merge the two chains together.
+    mergeDirect(Idx1, Idx2);
+  }
+
+  // \brief Merges two sets assuming that the set at `Idx1` is unreachable from
+  // traversing above or below the set at `Idx2`.
+  void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+    assert(inbounds(Idx1) && inbounds(Idx2));
+
+    auto *LinksInto = &linksAt(Idx1);
+    auto *LinksFrom = &linksAt(Idx2);
+    // Merging everything above LinksInto then proceeding to merge everything
+    // below LinksInto becomes problematic, so we go as far "up" as possible!
+    while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
+      LinksInto = &linksAt(LinksInto->getAbove());
+      LinksFrom = &linksAt(LinksFrom->getAbove());
+    }
+
+    if (LinksFrom->hasAbove()) {
+      LinksInto->setAbove(LinksFrom->getAbove());
+      auto &NewAbove = linksAt(LinksInto->getAbove());
+      NewAbove.setBelow(LinksInto->Number);
+    }
+
+    // Merging strategy:
+    //  > If neither has links below, stop.
+    //  > If only `LinksInto` has links below, stop.
+    //  > If only `LinksFrom` has links below, reset `LinksInto.Below` to
+    //  match `LinksFrom.Below`
+    //  > If both have links above, deal with those next.
+    while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
+      auto &FromAttrs = LinksFrom->getAttrs();
+      LinksInto->setAttrs(FromAttrs);
+
+      // Remap needs to happen after getBelow(), but before
+      // assignment of LinksFrom
+      auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
+      LinksFrom->remapTo(LinksInto->Number);
+      LinksFrom = NewLinksFrom;
+      LinksInto = &linksAt(LinksInto->getBelow());
+    }
+
+    if (LinksFrom->hasBelow()) {
+      LinksInto->setBelow(LinksFrom->getBelow());
+      auto &NewBelow = linksAt(LinksInto->getBelow());
+      NewBelow.setAbove(LinksInto->Number);
+    }
+
+    LinksFrom->remapTo(LinksInto->Number);
+  }
+
+  // \brief Checks to see if lowerIndex is at a level lower than upperIndex.
+  // If so, it will merge lowerIndex with upperIndex (and all of the sets
+  // between) and return true. Otherwise, it will return false.
+  bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
+    assert(inbounds(LowerIndex) && inbounds(UpperIndex));
+    auto *Lower = &linksAt(LowerIndex);
+    auto *Upper = &linksAt(UpperIndex);
+    if (Lower == Upper)
+      return true;
+
+    SmallVector<BuilderLink *, 8> Found;
+    auto *Current = Lower;
+    auto Attrs = Current->getAttrs();
+    while (Current->hasAbove() && Current != Upper) {
+      Found.push_back(Current);
+      Attrs |= Current->getAttrs();
+      Current = &linksAt(Current->getAbove());
+    }
+
+    if (Current != Upper)
+      return false;
+
+    Upper->setAttrs(Attrs);
+
+    if (Lower->hasBelow()) {
+      auto NewBelowIndex = Lower->getBelow();
+      Upper->setBelow(NewBelowIndex);
+      auto &NewBelow = linksAt(NewBelowIndex);
+      NewBelow.setAbove(UpperIndex);
+    } else {
+      Upper->clearBelow();
+    }
+
+    for (const auto &Ptr : Found)
+      Ptr->remapTo(Upper->Number);
+
+    return true;
+  }
+
+  Optional<const StratifiedInfo *> get(const T &Val) const {
+    auto Result = Values.find(Val);
+    if (Result == Values.end())
+      return NoneType();
+    return &Result->second;
+  }
+
+  Optional<StratifiedInfo *> get(const T &Val) {
+    auto Result = Values.find(Val);
+    if (Result == Values.end())
+      return NoneType();
+    return &Result->second;
+  }
+
+  Optional<StratifiedIndex> indexOf(const T &Val) {
+    auto MaybeVal = get(Val);
+    if (!MaybeVal.hasValue())
+      return NoneType();
+    auto *Info = *MaybeVal;
+    auto &Link = linksAt(Info->Index);
+    return Link.Number;
+  }
+
+  StratifiedIndex addLinkBelow(StratifiedIndex Set) {
+    auto At = addLinks();
+    Links[Set].setBelow(At);
+    Links[At].setAbove(Set);
+    return At;
+  }
+
+  StratifiedIndex addLinkAbove(StratifiedIndex Set) {
+    auto At = addLinks();
+    Links[At].setBelow(Set);
+    Links[Set].setAbove(At);
+    return At;
+  }
+
+  StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
+
+  StratifiedIndex addLinks() {
+    auto Link = Links.size();
+    Links.push_back(BuilderLink(Link));
+    return Link;
+  }
+
+  bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
+};
+}
+#endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
new file mode 100644
index 0000000..ce38819
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -0,0 +1,634 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
+    "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+    cl::init(TargetLibraryInfoImpl::NoLibrary),
+    cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
+                          "No vector functions library"),
+               clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
+                          "Accelerate framework"),
+               clEnumValEnd));
+
+const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+#define TLI_DEFINE_STRING
+#include "llvm/Analysis/TargetLibraryInfo.def"
+};
+
+static bool hasSinCosPiStret(const Triple &T) {
+  // Only Darwin variants have _stret versions of combined trig functions.
+  if (!T.isOSDarwin())
+    return false;
+
+  // The ABI is rather complicated on x86, so don't do anything special there.
+  if (T.getArch() == Triple::x86)
+    return false;
+
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
+    return false;
+
+  if (T.isiOS() && T.isOSVersionLT(7, 0))
+    return false;
+
+  return true;
+}
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple.  This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
+                       ArrayRef<const char *> StandardNames) {
+  // Verify that the StandardNames array is in alphabetical order.
+  assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
+                        [](const char *LHS, const char *RHS) {
+                          return strcmp(LHS, RHS) < 0;
+                        }) &&
+         "TargetLibraryInfoImpl function names must be sorted");
+
+  if (T.getArch() == Triple::r600 ||
+      T.getArch() == Triple::amdgcn) {
+    TLI.setUnavailable(LibFunc::ldexp);
+    TLI.setUnavailable(LibFunc::ldexpf);
+    TLI.setUnavailable(LibFunc::ldexpl);
+  }
+
+  // There are no library implementations of mempcy and memset for AMD gpus and
+  // these can be difficult to lower in the backend.
+  if (T.getArch() == Triple::r600 ||
+      T.getArch() == Triple::amdgcn ||
+      T.getArch() == Triple::wasm32 ||
+      T.getArch() == Triple::wasm64) {
+    TLI.setUnavailable(LibFunc::memcpy);
+    TLI.setUnavailable(LibFunc::memset);
+    TLI.setUnavailable(LibFunc::memset_pattern16);
+    return;
+  }
+
+  // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+  // All versions of watchOS support it.
+  if (T.isMacOSX()) {
+    if (T.isMacOSXVersionLT(10, 5))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else if (T.isiOS()) {
+    if (T.isOSVersionLT(3, 0))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else if (!T.isWatchOS()) {
+    TLI.setUnavailable(LibFunc::memset_pattern16);
+  }
+
+  if (!hasSinCosPiStret(T)) {
+    TLI.setUnavailable(LibFunc::sinpi);
+    TLI.setUnavailable(LibFunc::sinpif);
+    TLI.setUnavailable(LibFunc::cospi);
+    TLI.setUnavailable(LibFunc::cospif);
+    TLI.setUnavailable(LibFunc::sincospi_stret);
+    TLI.setUnavailable(LibFunc::sincospif_stret);
+  }
+
+  if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+      !T.isMacOSXVersionLT(10, 7)) {
+    // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+    // we don't care about) have two versions; on recent OSX, the one we want
+    // has a $UNIX2003 suffix. The two implementations are identical except
+    // for the return value in some edge cases.  However, we don't want to
+    // generate code that depends on the old symbols.
+    TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+    TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+  }
+
+  // iprintf and friends are only available on XCore and TCE.
+  if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+    TLI.setUnavailable(LibFunc::iprintf);
+    TLI.setUnavailable(LibFunc::siprintf);
+    TLI.setUnavailable(LibFunc::fiprintf);
+  }
+
+  if (T.isOSWindows() && !T.isOSCygMing()) {
+    // Win32 does not support long double
+    TLI.setUnavailable(LibFunc::acosl);
+    TLI.setUnavailable(LibFunc::asinl);
+    TLI.setUnavailable(LibFunc::atanl);
+    TLI.setUnavailable(LibFunc::atan2l);
+    TLI.setUnavailable(LibFunc::ceill);
+    TLI.setUnavailable(LibFunc::copysignl);
+    TLI.setUnavailable(LibFunc::cosl);
+    TLI.setUnavailable(LibFunc::coshl);
+    TLI.setUnavailable(LibFunc::expl);
+    TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
+    TLI.setUnavailable(LibFunc::fabsl);
+    TLI.setUnavailable(LibFunc::floorl);
+    TLI.setUnavailable(LibFunc::fmaxl);
+    TLI.setUnavailable(LibFunc::fminl);
+    TLI.setUnavailable(LibFunc::fmodl);
+    TLI.setUnavailable(LibFunc::frexpl);
+    TLI.setUnavailable(LibFunc::ldexpf);
+    TLI.setUnavailable(LibFunc::ldexpl);
+    TLI.setUnavailable(LibFunc::logl);
+    TLI.setUnavailable(LibFunc::modfl);
+    TLI.setUnavailable(LibFunc::powl);
+    TLI.setUnavailable(LibFunc::sinl);
+    TLI.setUnavailable(LibFunc::sinhl);
+    TLI.setUnavailable(LibFunc::sqrtl);
+    TLI.setUnavailable(LibFunc::tanl);
+    TLI.setUnavailable(LibFunc::tanhl);
+
+    // Win32 only has C89 math
+    TLI.setUnavailable(LibFunc::acosh);
+    TLI.setUnavailable(LibFunc::acoshf);
+    TLI.setUnavailable(LibFunc::acoshl);
+    TLI.setUnavailable(LibFunc::asinh);
+    TLI.setUnavailable(LibFunc::asinhf);
+    TLI.setUnavailable(LibFunc::asinhl);
+    TLI.setUnavailable(LibFunc::atanh);
+    TLI.setUnavailable(LibFunc::atanhf);
+    TLI.setUnavailable(LibFunc::atanhl);
+    TLI.setUnavailable(LibFunc::cbrt);
+    TLI.setUnavailable(LibFunc::cbrtf);
+    TLI.setUnavailable(LibFunc::cbrtl);
+    TLI.setUnavailable(LibFunc::exp2);
+    TLI.setUnavailable(LibFunc::exp2f);
+    TLI.setUnavailable(LibFunc::exp2l);
+    TLI.setUnavailable(LibFunc::expm1);
+    TLI.setUnavailable(LibFunc::expm1f);
+    TLI.setUnavailable(LibFunc::expm1l);
+    TLI.setUnavailable(LibFunc::log2);
+    TLI.setUnavailable(LibFunc::log2f);
+    TLI.setUnavailable(LibFunc::log2l);
+    TLI.setUnavailable(LibFunc::log1p);
+    TLI.setUnavailable(LibFunc::log1pf);
+    TLI.setUnavailable(LibFunc::log1pl);
+    TLI.setUnavailable(LibFunc::logb);
+    TLI.setUnavailable(LibFunc::logbf);
+    TLI.setUnavailable(LibFunc::logbl);
+    TLI.setUnavailable(LibFunc::nearbyint);
+    TLI.setUnavailable(LibFunc::nearbyintf);
+    TLI.setUnavailable(LibFunc::nearbyintl);
+    TLI.setUnavailable(LibFunc::rint);
+    TLI.setUnavailable(LibFunc::rintf);
+    TLI.setUnavailable(LibFunc::rintl);
+    TLI.setUnavailable(LibFunc::round);
+    TLI.setUnavailable(LibFunc::roundf);
+    TLI.setUnavailable(LibFunc::roundl);
+    TLI.setUnavailable(LibFunc::trunc);
+    TLI.setUnavailable(LibFunc::truncf);
+    TLI.setUnavailable(LibFunc::truncl);
+
+    // Win32 provides some C99 math with mangled names
+    TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+
+    if (T.getArch() == Triple::x86) {
+      // Win32 on x86 implements single-precision math functions as macros
+      TLI.setUnavailable(LibFunc::acosf);
+      TLI.setUnavailable(LibFunc::asinf);
+      TLI.setUnavailable(LibFunc::atanf);
+      TLI.setUnavailable(LibFunc::atan2f);
+      TLI.setUnavailable(LibFunc::ceilf);
+      TLI.setUnavailable(LibFunc::copysignf);
+      TLI.setUnavailable(LibFunc::cosf);
+      TLI.setUnavailable(LibFunc::coshf);
+      TLI.setUnavailable(LibFunc::expf);
+      TLI.setUnavailable(LibFunc::floorf);
+      TLI.setUnavailable(LibFunc::fminf);
+      TLI.setUnavailable(LibFunc::fmaxf);
+      TLI.setUnavailable(LibFunc::fmodf);
+      TLI.setUnavailable(LibFunc::logf);
+      TLI.setUnavailable(LibFunc::powf);
+      TLI.setUnavailable(LibFunc::sinf);
+      TLI.setUnavailable(LibFunc::sinhf);
+      TLI.setUnavailable(LibFunc::sqrtf);
+      TLI.setUnavailable(LibFunc::tanf);
+      TLI.setUnavailable(LibFunc::tanhf);
+    }
+
+    // Win32 does *not* provide provide these functions, but they are
+    // generally available on POSIX-compliant systems:
+    TLI.setUnavailable(LibFunc::access);
+    TLI.setUnavailable(LibFunc::bcmp);
+    TLI.setUnavailable(LibFunc::bcopy);
+    TLI.setUnavailable(LibFunc::bzero);
+    TLI.setUnavailable(LibFunc::chmod);
+    TLI.setUnavailable(LibFunc::chown);
+    TLI.setUnavailable(LibFunc::closedir);
+    TLI.setUnavailable(LibFunc::ctermid);
+    TLI.setUnavailable(LibFunc::fdopen);
+    TLI.setUnavailable(LibFunc::ffs);
+    TLI.setUnavailable(LibFunc::fileno);
+    TLI.setUnavailable(LibFunc::flockfile);
+    TLI.setUnavailable(LibFunc::fseeko);
+    TLI.setUnavailable(LibFunc::fstat);
+    TLI.setUnavailable(LibFunc::fstatvfs);
+    TLI.setUnavailable(LibFunc::ftello);
+    TLI.setUnavailable(LibFunc::ftrylockfile);
+    TLI.setUnavailable(LibFunc::funlockfile);
+    TLI.setUnavailable(LibFunc::getc_unlocked);
+    TLI.setUnavailable(LibFunc::getitimer);
+    TLI.setUnavailable(LibFunc::getlogin_r);
+    TLI.setUnavailable(LibFunc::getpwnam);
+    TLI.setUnavailable(LibFunc::gettimeofday);
+    TLI.setUnavailable(LibFunc::htonl);
+    TLI.setUnavailable(LibFunc::htons);
+    TLI.setUnavailable(LibFunc::lchown);
+    TLI.setUnavailable(LibFunc::lstat);
+    TLI.setUnavailable(LibFunc::memccpy);
+    TLI.setUnavailable(LibFunc::mkdir);
+    TLI.setUnavailable(LibFunc::ntohl);
+    TLI.setUnavailable(LibFunc::ntohs);
+    TLI.setUnavailable(LibFunc::open);
+    TLI.setUnavailable(LibFunc::opendir);
+    TLI.setUnavailable(LibFunc::pclose);
+    TLI.setUnavailable(LibFunc::popen);
+    TLI.setUnavailable(LibFunc::pread);
+    TLI.setUnavailable(LibFunc::pwrite);
+    TLI.setUnavailable(LibFunc::read);
+    TLI.setUnavailable(LibFunc::readlink);
+    TLI.setUnavailable(LibFunc::realpath);
+    TLI.setUnavailable(LibFunc::rmdir);
+    TLI.setUnavailable(LibFunc::setitimer);
+    TLI.setUnavailable(LibFunc::stat);
+    TLI.setUnavailable(LibFunc::statvfs);
+    TLI.setUnavailable(LibFunc::stpcpy);
+    TLI.setUnavailable(LibFunc::stpncpy);
+    TLI.setUnavailable(LibFunc::strcasecmp);
+    TLI.setUnavailable(LibFunc::strncasecmp);
+    TLI.setUnavailable(LibFunc::times);
+    TLI.setUnavailable(LibFunc::uname);
+    TLI.setUnavailable(LibFunc::unlink);
+    TLI.setUnavailable(LibFunc::unsetenv);
+    TLI.setUnavailable(LibFunc::utime);
+    TLI.setUnavailable(LibFunc::utimes);
+    TLI.setUnavailable(LibFunc::write);
+
+    // Win32 does *not* provide provide these functions, but they are
+    // specified by C99:
+    TLI.setUnavailable(LibFunc::atoll);
+    TLI.setUnavailable(LibFunc::frexpf);
+    TLI.setUnavailable(LibFunc::llabs);
+  }
+
+  switch (T.getOS()) {
+  case Triple::MacOSX:
+    // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
+    // and their names are __exp10 and __exp10f. exp10l is not available on
+    // OS X or iOS.
+    TLI.setUnavailable(LibFunc::exp10l);
+    if (T.isMacOSXVersionLT(10, 9)) {
+      TLI.setUnavailable(LibFunc::exp10);
+      TLI.setUnavailable(LibFunc::exp10f);
+    } else {
+      TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+      TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+    }
+    break;
+  case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
+    TLI.setUnavailable(LibFunc::exp10l);
+    if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
+                           (T.isOSVersionLT(9, 0) &&
+                            (T.getArch() == Triple::x86 ||
+                             T.getArch() == Triple::x86_64)))) {
+      TLI.setUnavailable(LibFunc::exp10);
+      TLI.setUnavailable(LibFunc::exp10f);
+    } else {
+      TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+      TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+    }
+    break;
+  case Triple::Linux:
+    // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely
+    // buggy prior to glibc version 2.18. Until this version is widely deployed
+    // or we have a reasonable detection strategy, we cannot use exp10 reliably
+    // on Linux.
+    //
+    // Fall through to disable all of them.
+  default:
+    TLI.setUnavailable(LibFunc::exp10);
+    TLI.setUnavailable(LibFunc::exp10f);
+    TLI.setUnavailable(LibFunc::exp10l);
+  }
+
+  // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+  // Linux (GLIBC):
+  // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+  // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c
+  // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+  switch (T.getOS()) {
+  case Triple::Darwin:
+  case Triple::MacOSX:
+  case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
+  case Triple::FreeBSD:
+  case Triple::Linux:
+    break;
+  default:
+    TLI.setUnavailable(LibFunc::ffsl);
+  }
+
+  // ffsll is available on at least FreeBSD and Linux (GLIBC):
+  // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c
+  // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+  switch (T.getOS()) {
+  case Triple::Darwin:
+  case Triple::MacOSX:
+  case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
+  case Triple::FreeBSD:
+  case Triple::Linux:
+    break;
+  default:
+    TLI.setUnavailable(LibFunc::ffsll);
+  }
+
+  // The following functions are available on at least FreeBSD:
+  // http://svn.freebsd.org/base/head/lib/libc/string/fls.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
+  if (!T.isOSFreeBSD()) {
+    TLI.setUnavailable(LibFunc::fls);
+    TLI.setUnavailable(LibFunc::flsl);
+    TLI.setUnavailable(LibFunc::flsll);
+  }
+
+  // The following functions are available on at least Linux:
+  if (!T.isOSLinux()) {
+    TLI.setUnavailable(LibFunc::dunder_strdup);
+    TLI.setUnavailable(LibFunc::dunder_strtok_r);
+    TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+    TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+    TLI.setUnavailable(LibFunc::under_IO_getc);
+    TLI.setUnavailable(LibFunc::under_IO_putc);
+    TLI.setUnavailable(LibFunc::memalign);
+    TLI.setUnavailable(LibFunc::fopen64);
+    TLI.setUnavailable(LibFunc::fseeko64);
+    TLI.setUnavailable(LibFunc::fstat64);
+    TLI.setUnavailable(LibFunc::fstatvfs64);
+    TLI.setUnavailable(LibFunc::ftello64);
+    TLI.setUnavailable(LibFunc::lstat64);
+    TLI.setUnavailable(LibFunc::open64);
+    TLI.setUnavailable(LibFunc::stat64);
+    TLI.setUnavailable(LibFunc::statvfs64);
+    TLI.setUnavailable(LibFunc::tmpfile64);
+  }
+
+  TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+
+  initialize(*this, Triple(), StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+
+  initialize(*this, T, StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
+    : CustomNames(TLI.CustomNames) {
+  memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+  VectorDescs = TLI.VectorDescs;
+  ScalarDescs = TLI.ScalarDescs;
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
+    : CustomNames(std::move(TLI.CustomNames)) {
+  std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+            AvailableArray);
+  VectorDescs = TLI.VectorDescs;
+  ScalarDescs = TLI.ScalarDescs;
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
+  CustomNames = TLI.CustomNames;
+  memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+  return *this;
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
+  CustomNames = std::move(TLI.CustomNames);
+  std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+            AvailableArray);
+  return *this;
+}
+
+static StringRef sanitizeFunctionName(StringRef funcName) {
+  // Filter out empty names and names containing null bytes, those can't be in
+  // our table.
+  if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+    return StringRef();
+
+  // Check for \01 prefix that is used to mangle __asm declarations and
+  // strip it if present.
+  return GlobalValue::getRealLinkageName(funcName);
+}
+
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
+                                   LibFunc::Func &F) const {
+  const char *const *Start = &StandardNames[0];
+  const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+
+  funcName = sanitizeFunctionName(funcName);
+  if (funcName.empty())
+    return false;
+
+  const char *const *I = std::lower_bound(
+      Start, End, funcName, [](const char *LHS, StringRef RHS) {
+        return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+      });
+  if (I != End && *I == funcName) {
+    F = (LibFunc::Func)(I - Start);
+    return true;
+  }
+  return false;
+}
+
+void TargetLibraryInfoImpl::disableAllFunctions() {
+  memset(AvailableArray, 0, sizeof(AvailableArray));
+}
+
+static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
+  return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
+                      std::strlen(RHS.ScalarFnName)) < 0;
+}
+
+static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
+  return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
+                      std::strlen(RHS.VectorFnName)) < 0;
+}
+
+static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
+  return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+}
+
+static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
+  return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
+  VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
+  std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+
+  ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
+  std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+    enum VectorLibrary VecLib) {
+  switch (VecLib) {
+  case Accelerate: {
+    const VecDesc VecFuncs[] = {
+        // Floating-Point Arithmetic and Auxiliary Functions
+        {"ceilf", "vceilf", 4},
+        {"fabsf", "vfabsf", 4},
+        {"llvm.fabs.f32", "vfabsf", 4},
+        {"floorf", "vfloorf", 4},
+        {"sqrtf", "vsqrtf", 4},
+        {"llvm.sqrt.f32", "vsqrtf", 4},
+
+        // Exponential and Logarithmic Functions
+        {"expf", "vexpf", 4},
+        {"llvm.exp.f32", "vexpf", 4},
+        {"expm1f", "vexpm1f", 4},
+        {"logf", "vlogf", 4},
+        {"llvm.log.f32", "vlogf", 4},
+        {"log1pf", "vlog1pf", 4},
+        {"log10f", "vlog10f", 4},
+        {"llvm.log10.f32", "vlog10f", 4},
+        {"logbf", "vlogbf", 4},
+
+        // Trigonometric Functions
+        {"sinf", "vsinf", 4},
+        {"llvm.sin.f32", "vsinf", 4},
+        {"cosf", "vcosf", 4},
+        {"llvm.cos.f32", "vcosf", 4},
+        {"tanf", "vtanf", 4},
+        {"asinf", "vasinf", 4},
+        {"acosf", "vacosf", 4},
+        {"atanf", "vatanf", 4},
+
+        // Hyperbolic Functions
+        {"sinhf", "vsinhf", 4},
+        {"coshf", "vcoshf", 4},
+        {"tanhf", "vtanhf", 4},
+        {"asinhf", "vasinhf", 4},
+        {"acoshf", "vacoshf", 4},
+        {"atanhf", "vatanhf", 4},
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
+  case NoLibrary:
+    break;
+  }
+}
+
+bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+  funcName = sanitizeFunctionName(funcName);
+  if (funcName.empty())
+    return false;
+
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      VectorDescs.begin(), VectorDescs.end(), funcName,
+      compareWithScalarFnName);
+  return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+}
+
+StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+                                                       unsigned VF) const {
+  F = sanitizeFunctionName(F);
+  if (F.empty())
+    return F;
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+  while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+    if (I->VectorizationFactor == VF)
+      return I->VectorFnName;
+    ++I;
+  }
+  return StringRef();
+}
+
+StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
+                                                       unsigned &VF) const {
+  F = sanitizeFunctionName(F);
+  if (F.empty())
+    return F;
+
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+  if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
+    return StringRef();
+  VF = I->VectorizationFactor;
+  return I->ScalarFnName;
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) {
+  if (PresetInfoImpl)
+    return TargetLibraryInfo(*PresetInfoImpl);
+
+  return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) {
+  if (PresetInfoImpl)
+    return TargetLibraryInfo(*PresetInfoImpl);
+
+  return TargetLibraryInfo(
+      lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
+}
+
+TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) {
+  std::unique_ptr<TargetLibraryInfoImpl> &Impl =
+      Impls[T.normalize()];
+  if (!Impl)
+    Impl.reset(new TargetLibraryInfoImpl(T));
+
+  return *Impl;
+}
+
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
+    : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
+    : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
+    const TargetLibraryInfoImpl &TLIImpl)
+    : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char TargetLibraryAnalysis::PassID;
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
+                "Target Library Information", false, true)
+char TargetLibraryInfoWrapperPass::ID = 0;
+
+void TargetLibraryInfoWrapperPass::anchor() {}
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
new file mode 100644
index 0000000..9c1d3fd
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -0,0 +1,406 @@
+//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tti"
+
+namespace {
+/// \brief No-op implementation of the TTI interface using the utility base
+/// classes.
+///
+/// This is used when no target specific information is available.
+struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
+  explicit NoTTIImpl(const DataLayout &DL)
+      : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
+};
+}
+
+TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
+    : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
+
+TargetTransformInfo::~TargetTransformInfo() {}
+
+TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
+    : TTIImpl(std::move(Arg.TTIImpl)) {}
+
+TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
+  TTIImpl = std::move(RHS.TTIImpl);
+  return *this;
+}
+
+int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+                                          Type *OpTy) const {
+  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
+  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCallCost(const Function *F,
+                                     ArrayRef<const Value *> Arguments) const {
+  int Cost = TTIImpl->getCallCost(F, Arguments);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntrinsicCost(
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getUserCost(const User *U) const {
+  int Cost = TTIImpl->getUserCost(U);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+bool TargetTransformInfo::hasBranchDivergence() const {
+  return TTIImpl->hasBranchDivergence();
+}
+
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+  return TTIImpl->isSourceOfDivergence(V);
+}
+
+bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
+  return TTIImpl->isLoweredToCall(F);
+}
+
+void TargetTransformInfo::getUnrollingPreferences(
+    Loop *L, UnrollingPreferences &UP) const {
+  return TTIImpl->getUnrollingPreferences(L, UP);
+}
+
+bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
+  return TTIImpl->isLegalAddImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
+  return TTIImpl->isLegalICmpImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                                int64_t BaseOffset,
+                                                bool HasBaseReg,
+                                                int64_t Scale,
+                                                unsigned AddrSpace) const {
+  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+                                        Scale, AddrSpace);
+}
+
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
+  return TTIImpl->isLegalMaskedStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
+  return TTIImpl->isLegalMaskedLoad(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
+  return TTIImpl->isLegalMaskedGather(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
+  return TTIImpl->isLegalMaskedGather(DataType);
+}
+
+int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                              int64_t BaseOffset,
+                                              bool HasBaseReg,
+                                              int64_t Scale,
+                                              unsigned AddrSpace) const {
+  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+                                           Scale, AddrSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  return TTIImpl->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
+  return TTIImpl->isProfitableToHoist(I);
+}
+
+bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
+  return TTIImpl->isTypeLegal(Ty);
+}
+
+unsigned TargetTransformInfo::getJumpBufAlignment() const {
+  return TTIImpl->getJumpBufAlignment();
+}
+
+unsigned TargetTransformInfo::getJumpBufSize() const {
+  return TTIImpl->getJumpBufSize();
+}
+
+bool TargetTransformInfo::shouldBuildLookupTables() const {
+  return TTIImpl->shouldBuildLookupTables();
+}
+
+bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
+  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
+}
+
+bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
+  return TTIImpl->enableInterleavedAccessVectorization();
+}
+
+TargetTransformInfo::PopcntSupportKind
+TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
+  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
+}
+
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+  return TTIImpl->haveFastSqrt(Ty);
+}
+
+int TargetTransformInfo::getFPOpCost(Type *Ty) const {
+  int Cost = TTIImpl->getFPOpCost(Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
+  return TTIImpl->getNumberOfRegisters(Vector);
+}
+
+unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
+  return TTIImpl->getRegisterBitWidth(Vector);
+}
+
+unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+  return TTIImpl->getMaxInterleaveFactor(VF);
+}
+
+int TargetTransformInfo::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
+  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                             Opd1PropInfo, Opd2PropInfo);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
+                                        Type *SubTp) const {
+  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                          Type *Src) const {
+  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+  int Cost = TTIImpl->getCFInstrCost(Opcode);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                            Type *CondTy) const {
+  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                            unsigned Index) const {
+  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                         unsigned Alignment,
+                                         unsigned AddressSpace) const {
+  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                               unsigned Alignment,
+                                               unsigned AddressSpace) const {
+  int Cost =
+      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+                                                Value *Ptr, bool VariableMask,
+                                                unsigned Alignment) const {
+  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                             Alignment);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getInterleavedMemoryOpCost(
+    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+    unsigned Alignment, unsigned AddressSpace) const {
+  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                                 Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                               ArrayRef<Type *> Tys) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                               ArrayRef<Value *> Args) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+                                          ArrayRef<Type *> Tys) const {
+  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
+  return TTIImpl->getNumberOfParts(Tp);
+}
+
+int TargetTransformInfo::getAddressComputationCost(Type *Tp,
+                                                   bool IsComplex) const {
+  int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+                                          bool IsPairwiseForm) const {
+  int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+unsigned
+TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
+}
+
+bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
+                                             MemIntrinsicInfo &Info) const {
+  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
+}
+
+Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
+    IntrinsicInst *Inst, Type *ExpectedType) const {
+  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+}
+
+bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
+                                              const Function *Callee) const {
+  return TTIImpl->areInlineCompatible(Caller, Callee);
+}
+
+TargetTransformInfo::Concept::~Concept() {}
+
+TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
+
+TargetIRAnalysis::TargetIRAnalysis(
+    std::function<Result(const Function &)> TTICallback)
+    : TTICallback(TTICallback) {}
+
+TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) {
+  return TTICallback(F);
+}
+
+char TargetIRAnalysis::PassID;
+
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
+  return Result(F.getParent()->getDataLayout());
+}
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
+                "Target Transform Information", false, true)
+char TargetTransformInfoWrapperPass::ID = 0;
+
+void TargetTransformInfoWrapperPass::anchor() {}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
+    : ImmutablePass(ID) {
+  initializeTargetTransformInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
+    TargetIRAnalysis TIRA)
+    : ImmutablePass(ID), TIRA(std::move(TIRA)) {
+  initializeTargetTransformInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
+  TTI = TIRA.run(F);
+  return *TTI;
+}
+
+ImmutablePass *
+llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
+  return new TargetTransformInfoWrapperPass(std::move(TIRA));
+}
diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp
new file mode 100644
index 0000000..5a1acc0
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/Trace.cpp
@@ -0,0 +1,52 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks.  A trace is a
+// single entry, multiple exit, region of code that is often hot.  Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+  return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+  return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+  Function *F = getFunction();
+  O << "; Trace from function " << F->getName() << ", blocks:\n";
+  for (const_iterator i = begin(), e = end(); i != e; ++i) {
+    O << "; ";
+    (*i)->printAsOperand(O, true, getModule());
+    O << "\n";
+  }
+  O << "; Trace parent function: \n" << *F;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+  print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 0000000..805f3ef
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,622 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// We now support two types of metadata format: scalar TBAA and struct-path
+// aware TBAA. After all testing cases are upgraded to use struct-path aware
+// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
+// can be dropped.
+//
+// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+//   !0 = metadata !{ metadata !"an example type tree" }
+//   !1 = metadata !{ metadata !"int", metadata !0 }
+//   !2 = metadata !{ metadata !"float", metadata !0 }
+//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// With struct-path aware TBAA, the MDNodes attached to an instruction using
+// "!tbaa" are called path tag nodes.
+//
+// The path tag node has 4 fields with the last field being optional.
+//
+// The first field is the base type node, it can be a struct type node
+// or a scalar type node. The second field is the access type node, it
+// must be a scalar type node. The third field is the offset into the base type.
+// The last field has the same meaning as the last field of our scalar TBAA:
+// it's an integer which if equal to 1 indicates that the access is "constant".
+//
+// The struct type node has a name and a list of pairs, one pair for each member
+// of the struct. The first element of each pair is a type node (a struct type
+// node or a sclar type node), specifying the type of the member, the second
+// element of each pair is the offset of the member.
+//
+// Given an example
+// typedef struct {
+//   short s;
+// } A;
+// typedef struct {
+//   uint16_t s;
+//   A a;
+// } B;
+//
+// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// instruction. The base type is !4 (struct B), the access type is !2 (scalar
+// type short) and the offset is 4.
+//
+// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
+// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
+// !2 = metadata !{metadata !"short", metadata !1}           // Scalar type node
+// !3 = metadata !{metadata !"A", metadata !2, i64 0}        // Struct type node
+// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+//                                                           // Struct type node
+// !5 = metadata !{metadata !4, metadata !2, i64 4}          // Path tag node
+//
+// The struct type nodes and the scalar type nodes form a type DAG.
+//         Root (!0)
+//         char (!1)  -- edge to Root
+//         short (!2) -- edge to char
+//         A (!3) -- edge with offset 0 to short
+//         B (!4) -- edge with offset 0 to short and edge with offset 4 to A
+//
+// To check if two tags (tagX and tagY) can alias, we start from the base type
+// of tagX, follow the edge with the correct offset in the type DAG and adjust
+// the offset until we reach the base type of tagY or until we reach the Root
+// node.
+// If we reach the base type of tagY, compare the adjusted offset with
+// offset of tagY, return Alias if the offsets are the same, return NoAlias
+// otherwise.
+// If we reach the Root node, perform the above starting from base type of tagY
+// to see if we reach base type of tagX.
+//
+// If they have different roots, they're part of different potentially
+// unrelated type systems, so we return Alias to be conservative.
+// If neither node is an ancestor of the other and they have the same root,
+// then we say NoAlias.
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+//   struct X {
+//     double d;
+//     int i;
+//   };
+//   void foo(struct X *x, struct X *y, double *p) {
+//     *x = *y;
+//     *p = 0.0;
+//   }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+/// TBAANode - This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAANode {
+  const MDNode *Node;
+
+public:
+  TBAANode() : Node(nullptr) {}
+  explicit TBAANode(const MDNode *N) : Node(N) {}
+
+  /// getNode - Get the MDNode for this TBAANode.
+  const MDNode *getNode() const { return Node; }
+
+  /// getParent - Get this TBAANode's Alias tree parent.
+  TBAANode getParent() const {
+    if (Node->getNumOperands() < 2)
+      return TBAANode();
+    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    if (!P)
+      return TBAANode();
+    // Ok, this node has a valid parent. Return it.
+    return TBAANode(P);
+  }
+
+  /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+  /// which are not modified (by any means) in the context where this
+  /// AliasAnalysis is relevant.
+  bool TypeIsImmutable() const {
+    if (Node->getNumOperands() < 3)
+      return false;
+    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
+    if (!CI)
+      return false;
+    return CI->getValue()[0];
+  }
+};
+
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTagNode {
+  /// This node should be created with createTBAAStructTagNode.
+  const MDNode *Node;
+
+public:
+  explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+  /// Get the MDNode for this TBAAStructTagNode.
+  const MDNode *getNode() const { return Node; }
+
+  const MDNode *getBaseType() const {
+    return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+  }
+  const MDNode *getAccessType() const {
+    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+  }
+  uint64_t getOffset() const {
+    return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
+  }
+  /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+  /// objects which are not modified (by any means) in the context where this
+  /// AliasAnalysis is relevant.
+  bool TypeIsImmutable() const {
+    if (Node->getNumOperands() < 4)
+      return false;
+    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
+    if (!CI)
+      return false;
+    return CI->getValue()[0];
+  }
+};
+
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTypeNode {
+  /// This node should be created with createTBAAStructTypeNode.
+  const MDNode *Node;
+
+public:
+  TBAAStructTypeNode() : Node(nullptr) {}
+  explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+  /// Get the MDNode for this TBAAStructTypeNode.
+  const MDNode *getNode() const { return Node; }
+
+  /// Get this TBAAStructTypeNode's field in the type DAG with
+  /// given offset. Update the offset to be relative to the field type.
+  TBAAStructTypeNode getParent(uint64_t &Offset) const {
+    // Parent can be omitted for the root node.
+    if (Node->getNumOperands() < 2)
+      return TBAAStructTypeNode();
+
+    // Fast path for a scalar type node and a struct type node with a single
+    // field.
+    if (Node->getNumOperands() <= 3) {
+      uint64_t Cur = Node->getNumOperands() == 2
+                         ? 0
+                         : mdconst::extract<ConstantInt>(Node->getOperand(2))
+                               ->getZExtValue();
+      Offset -= Cur;
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+      if (!P)
+        return TBAAStructTypeNode();
+      return TBAAStructTypeNode(P);
+    }
+
+    // Assume the offsets are in order. We return the previous field if
+    // the current offset is bigger than the given offset.
+    unsigned TheIdx = 0;
+    for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+                         ->getZExtValue();
+      if (Cur > Offset) {
+        assert(Idx >= 3 &&
+               "TBAAStructTypeNode::getParent should have an offset match!");
+        TheIdx = Idx - 2;
+        break;
+      }
+    }
+    // Move along the last field.
+    if (TheIdx == 0)
+      TheIdx = Node->getNumOperands() - 2;
+    uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+                       ->getZExtValue();
+    Offset -= Cur;
+    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+    if (!P)
+      return TBAAStructTypeNode();
+    return TBAAStructTypeNode(P);
+  }
+};
+}
+
+/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
+/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
+/// format.
+static bool isStructPathTBAA(const MDNode *MD) {
+  // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
+  // a TBAA tag.
+  return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+}
+
+AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
+                                     const MemoryLocation &LocB) {
+  if (!EnableTBAA)
+    return AAResultBase::alias(LocA, LocB);
+
+  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+  // be conservative.
+  const MDNode *AM = LocA.AATags.TBAA;
+  if (!AM)
+    return AAResultBase::alias(LocA, LocB);
+  const MDNode *BM = LocB.AATags.TBAA;
+  if (!BM)
+    return AAResultBase::alias(LocA, LocB);
+
+  // If they may alias, chain to the next AliasAnalysis.
+  if (Aliases(AM, BM))
+    return AAResultBase::alias(LocA, LocB);
+
+  // Otherwise return a definitive result.
+  return NoAlias;
+}
+
+bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                               bool OrLocal) {
+  if (!EnableTBAA)
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+
+  const MDNode *M = Loc.AATags.TBAA;
+  if (!M)
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+
+  // If this is an "immutable" type, we can assume the pointer is pointing
+  // to constant memory.
+  if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+      (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+    return true;
+
+  return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+}
+
+FunctionModRefBehavior
+TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
+  if (!EnableTBAA)
+    return AAResultBase::getModRefBehavior(CS);
+
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  // If this is an "immutable" type, we can assume the call doesn't write
+  // to memory.
+  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+      Min = FMRB_OnlyReadsMemory;
+
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
+  // Functions don't have metadata. Just chain to the next implementation.
+  return AAResultBase::getModRefBehavior(F);
+}
+
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+                                            const MemoryLocation &Loc) {
+  if (!EnableTBAA)
+    return AAResultBase::getModRefInfo(CS, Loc);
+
+  if (const MDNode *L = Loc.AATags.TBAA)
+    if (const MDNode *M =
+            CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(L, M))
+        return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS, Loc);
+}
+
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                            ImmutableCallSite CS2) {
+  if (!EnableTBAA)
+    return AAResultBase::getModRefInfo(CS1, CS2);
+
+  if (const MDNode *M1 =
+          CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M2 =
+            CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(M1, M2))
+        return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS1, CS2);
+}
+
+bool MDNode::isTBAAVtableAccess() const {
+  if (!isStructPathTBAA(this)) {
+    if (getNumOperands() < 1)
+      return false;
+    if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
+      if (Tag1->getString() == "vtable pointer")
+        return true;
+    }
+    return false;
+  }
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  if (getNumOperands() < 2)
+    return false;
+  MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
+  if (!Tag)
+    return false;
+  if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+    if (Tag1->getString() == "vtable pointer")
+      return true;
+  }
+  return false;
+}
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return nullptr;
+
+  if (A == B)
+    return A;
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
+  if (StructPath) {
+    A = cast_or_null<MDNode>(A->getOperand(1));
+    if (!A)
+      return nullptr;
+    B = cast_or_null<MDNode>(B->getOperand(1));
+    if (!B)
+      return nullptr;
+  }
+
+  SmallSetVector<MDNode *, 4> PathA;
+  MDNode *T = A;
+  while (T) {
+    if (PathA.count(T))
+      report_fatal_error("Cycle found in TBAA metadata.");
+    PathA.insert(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+                                 : nullptr;
+  }
+
+  SmallSetVector<MDNode *, 4> PathB;
+  T = B;
+  while (T) {
+    if (PathB.count(T))
+      report_fatal_error("Cycle found in TBAA metadata.");
+    PathB.insert(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+                                 : nullptr;
+  }
+
+  int IA = PathA.size() - 1;
+  int IB = PathB.size() - 1;
+
+  MDNode *Ret = nullptr;
+  while (IA >= 0 && IB >= 0) {
+    if (PathA[IA] == PathB[IB])
+      Ret = PathA[IA];
+    else
+      break;
+    --IA;
+    --IB;
+  }
+  if (!StructPath)
+    return Ret;
+
+  if (!Ret)
+    return nullptr;
+  // We need to convert from a type node to a tag node.
+  Type *Int64 = IntegerType::get(A->getContext(), 64);
+  Metadata *Ops[3] = {Ret, Ret,
+                      ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
+  return MDNode::get(A->getContext(), Ops);
+}
+
+void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
+  if (Merge)
+    N.TBAA =
+        MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
+  else
+    N.TBAA = getMetadata(LLVMContext::MD_tbaa);
+
+  if (Merge)
+    N.Scope = MDNode::getMostGenericAliasScope(
+        N.Scope, getMetadata(LLVMContext::MD_alias_scope));
+  else
+    N.Scope = getMetadata(LLVMContext::MD_alias_scope);
+
+  if (Merge)
+    N.NoAlias =
+        MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
+  else
+    N.NoAlias = getMetadata(LLVMContext::MD_noalias);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
+  // Make sure that both MDNodes are struct-path aware.
+  if (isStructPathTBAA(A) && isStructPathTBAA(B))
+    return PathAliases(A, B);
+
+  // Keep track of the root node for A and B.
+  TBAANode RootA, RootB;
+
+  // Climb the tree from A to see if we reach B.
+  for (TBAANode T(A);;) {
+    if (T.getNode() == B)
+      // B is an ancestor of A.
+      return true;
+
+    RootA = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Climb the tree from B to see if we reach A.
+  for (TBAANode T(B);;) {
+    if (T.getNode() == A)
+      // A is an ancestor of B.
+      return true;
+
+    RootB = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
+  // Verify that both input nodes are struct-path aware.
+  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA);;) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB);;) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char TypeBasedAA::PassID;
+
+char TypeBasedAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa",
+                      "Type-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
+                    false, true)
+
+ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
+  return new TypeBasedAAWrapperPass();
+}
+
+TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
+  initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new TypeBasedAAResult(
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 0000000..abc57ed
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,4218 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits.  The individual options below control how
+/// hard we search.  The defaults are chosen to be fairly aggressive.  If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+                                         cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+                                               cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+                                                   cl::Hidden,
+                                                   cl::init(20));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+                                              cl::Hidden, cl::init(20));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+                                               cl::Hidden, cl::init(false));
+
+/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
+/// 0). For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
+
+  return DL.getPointerTypeSizeInBits(Ty);
+}
+
+// Many of these functions have internal versions that take an assumption
+// exclusion set. This is because of the potential for mutual recursion to
+// cause computeKnownBits to repeatedly visit the same assume intrinsic. The
+// classic case of this is assume(x = y), which will attempt to determine
+// bits in x from bits in y, which will attempt to determine bits in y from
+// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
+// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
+// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on.
+typedef SmallPtrSet<const Value *, 8> ExclInvsSet;
+
+namespace {
+// Simplifying using an assume can only be done in a particular control-flow
+// context (the context instruction provides that context). If an assume and
+// the context instruction are not in the same block then the DT helps in
+// figuring out if we can use it.
+struct Query {
+  ExclInvsSet ExclInvs;
+  AssumptionCache *AC;
+  const Instruction *CxtI;
+  const DominatorTree *DT;
+
+  Query(AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr,
+        const DominatorTree *DT = nullptr)
+      : AC(AC), CxtI(CxtI), DT(DT) {}
+
+  Query(const Query &Q, const Value *NewExcl)
+      : ExclInvs(Q.ExclInvs), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT) {
+    ExclInvs.insert(NewExcl);
+  }
+};
+} // end anonymous namespace
+
+// Given the provided Value and, potentially, a context instruction, return
+// the preferred context instruction (if any).
+static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
+  // If we've been provided with a context instruction, then use that (provided
+  // it has been inserted).
+  if (CxtI && CxtI->getParent())
+    return CxtI;
+
+  // If the value is really an already-inserted instruction, then use that.
+  CxtI = dyn_cast<Instruction>(V);
+  if (CxtI && CxtI->getParent())
+    return CxtI;
+
+  return nullptr;
+}
+
+static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+                             const DataLayout &DL, unsigned Depth,
+                             const Query &Q);
+
+void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+                            const DataLayout &DL, unsigned Depth,
+                            AssumptionCache *AC, const Instruction *CxtI,
+                            const DominatorTree *DT) {
+  ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth,
+                     Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+                               AssumptionCache *AC, const Instruction *CxtI,
+                               const DominatorTree *DT) {
+  assert(LHS->getType() == RHS->getType() &&
+         "LHS and RHS should have the same type");
+  assert(LHS->getType()->isIntOrIntVectorTy() &&
+         "LHS and RHS should be integers");
+  IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
+  APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0);
+  APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0);
+  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT);
+  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT);
+  return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
+static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                           const DataLayout &DL, unsigned Depth,
+                           const Query &Q);
+
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                          const DataLayout &DL, unsigned Depth,
+                          AssumptionCache *AC, const Instruction *CxtI,
+                          const DominatorTree *DT) {
+  ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth,
+                   Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+                                   const Query &Q, const DataLayout &DL);
+
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+                                  unsigned Depth, AssumptionCache *AC,
+                                  const Instruction *CxtI,
+                                  const DominatorTree *DT) {
+  return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
+                                  Query(AC, safeCxtI(V, CxtI), DT), DL);
+}
+
+static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+                           const Query &Q);
+
+bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+                          AssumptionCache *AC, const Instruction *CxtI,
+                          const DominatorTree *DT) {
+  return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+                              AssumptionCache *AC, const Instruction *CxtI,
+                              const DominatorTree *DT) {
+  bool NonNegative, Negative;
+  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
+  return NonNegative;
+}
+
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                           const Query &Q);
+
+bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                          AssumptionCache *AC, const Instruction *CxtI,
+                          const DominatorTree *DT) {
+  return ::isKnownNonEqual(V1, V2, DL, Query(AC,
+                                             safeCxtI(V1, safeCxtI(V2, CxtI)),
+                                             DT));
+}
+
+static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+                              unsigned Depth, const Query &Q);
+
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+                             unsigned Depth, AssumptionCache *AC,
+                             const Instruction *CxtI, const DominatorTree *DT) {
+  return ::MaskedValueIsZero(V, Mask, DL, Depth,
+                             Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL,
+                                   unsigned Depth, const Query &Q);
+
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
+                                  unsigned Depth, AssumptionCache *AC,
+                                  const Instruction *CxtI,
+                                  const DominatorTree *DT) {
+  return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+                                   APInt &KnownZero, APInt &KnownOne,
+                                   APInt &KnownZero2, APInt &KnownOne2,
+                                   const DataLayout &DL, unsigned Depth,
+                                   const Query &Q) {
+  if (!Add) {
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (!CLHS->getValue().isNegative()) {
+        unsigned BitWidth = KnownZero.getBitWidth();
+        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+        }
+      }
+    }
+  }
+
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  // If an initial sequence of bits in the result is not needed, the
+  // corresponding bits in the operands are not needed.
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q);
+  computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+  // Carry in a 1 for a subtract, rather than a 0.
+  APInt CarryIn(BitWidth, 0);
+  if (!Add) {
+    // Sum = LHS + ~RHS + 1
+    std::swap(KnownZero2, KnownOne2);
+    CarryIn.setBit(0);
+  }
+
+  APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
+  APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn;
+
+  // Compute known bits of the carry.
+  APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2);
+  APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2;
+
+  // Compute set of known bits (where all three relevant bits are known).
+  APInt LHSKnown = LHSKnownZero | LHSKnownOne;
+  APInt RHSKnown = KnownZero2 | KnownOne2;
+  APInt CarryKnown = CarryKnownZero | CarryKnownOne;
+  APInt Known = LHSKnown & RHSKnown & CarryKnown;
+
+  assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
+         "known bits of sum differ");
+
+  // Compute known bits of the result.
+  KnownZero = ~PossibleSumOne & Known;
+  KnownOne = PossibleSumOne & Known;
+
+  // Are we still trying to solve for the sign bit?
+  if (!Known.isNegative()) {
+    if (NSW) {
+      // Adding two non-negative numbers, or subtracting a negative number from
+      // a non-negative one, can't wrap into negative.
+      if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+        KnownZero |= APInt::getSignBit(BitWidth);
+      // Adding two negative numbers, or subtracting a non-negative number from
+      // a negative one, can't wrap into non-negative.
+      else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+        KnownOne |= APInt::getSignBit(BitWidth);
+    }
+  }
+}
+
+static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+                                APInt &KnownZero, APInt &KnownOne,
+                                APInt &KnownZero2, APInt &KnownOne2,
+                                const DataLayout &DL, unsigned Depth,
+                                const Query &Q) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+  computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q);
+  computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+  bool isKnownNegative = false;
+  bool isKnownNonNegative = false;
+  // If the multiplication is known not to overflow, compute the sign bit.
+  if (NSW) {
+    if (Op0 == Op1) {
+      // The product of a number with itself is non-negative.
+      isKnownNonNegative = true;
+    } else {
+      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+      bool isKnownNegativeOp1 = KnownOne.isNegative();
+      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      // The product of two numbers with the same sign is non-negative.
+      isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+        (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+      // The product of a negative number and a non-negative number is either
+      // negative or zero.
+      if (!isKnownNonNegative)
+        isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+                           isKnownNonZero(Op0, DL, Depth, Q)) ||
+                          (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+                           isKnownNonZero(Op1, DL, Depth, Q));
+    }
+  }
+
+  // If low bits are zero in either operand, output low known-0 bits.
+  // Also compute a conservative estimate for high known-0 bits.
+  // More trickiness is possible, but this is sufficient for the
+  // interesting case of alignment computation.
+  KnownOne.clearAllBits();
+  unsigned TrailZ = KnownZero.countTrailingOnes() +
+                    KnownZero2.countTrailingOnes();
+  unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                             KnownZero2.countLeadingOnes(),
+                             BitWidth) - BitWidth;
+
+  TrailZ = std::min(TrailZ, BitWidth);
+  LeadZ = std::min(LeadZ, BitWidth);
+  KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+              APInt::getHighBitsSet(BitWidth, LeadZ);
+
+  // Only make use of no-wrap flags if we failed to compute the sign bit
+  // directly.  This matters if the multiplication always overflows, in
+  // which case we prefer to follow the result of the direct computation,
+  // though as the program is invoking undefined behaviour we can choose
+  // whatever we like here.
+  if (isKnownNonNegative && !KnownOne.isNegative())
+    KnownZero.setBit(BitWidth - 1);
+  else if (isKnownNegative && !KnownZero.isNegative())
+    KnownOne.setBit(BitWidth - 1);
+}
+
+void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+                                             APInt &KnownZero,
+                                             APInt &KnownOne) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned NumRanges = Ranges.getNumOperands() / 2;
+  assert(NumRanges >= 1);
+
+  KnownZero.setAllBits();
+  KnownOne.setAllBits();
+
+  for (unsigned i = 0; i < NumRanges; ++i) {
+    ConstantInt *Lower =
+        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+    ConstantInt *Upper =
+        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+    ConstantRange Range(Lower->getValue(), Upper->getValue());
+
+    // The first CommonPrefixBits of all values in Range are equal.
+    unsigned CommonPrefixBits =
+        (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
+
+    APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
+    KnownOne &= Range.getUnsignedMax() & Mask;
+    KnownZero &= ~Range.getUnsignedMax() & Mask;
+  }
+}
+
+static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+  SmallVector<const Value *, 16> WorkSet(1, I);
+  SmallPtrSet<const Value *, 32> Visited;
+  SmallPtrSet<const Value *, 16> EphValues;
+
+  // The instruction defining an assumption's condition itself is always
+  // considered ephemeral to that assumption (even if it has other
+  // non-ephemeral users). See r246696's test case for an example.
+  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+    return true;
+
+  while (!WorkSet.empty()) {
+    const Value *V = WorkSet.pop_back_val();
+    if (!Visited.insert(V).second)
+      continue;
+
+    // If all uses of this value are ephemeral, then so is this value.
+    if (std::all_of(V->user_begin(), V->user_end(),
+                    [&](const User *U) { return EphValues.count(U); })) {
+      if (V == E)
+        return true;
+
+      EphValues.insert(V);
+      if (const User *U = dyn_cast<User>(V))
+        for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
+             J != JE; ++J) {
+          if (isSafeToSpeculativelyExecute(*J))
+            WorkSet.push_back(*J);
+        }
+    }
+  }
+
+  return false;
+}
+
+// Is this an intrinsic that cannot be speculated but also cannot trap?
+static bool isAssumeLikeIntrinsic(const Instruction *I) {
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction())
+      switch (F->getIntrinsicID()) {
+      default: break;
+      // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
+      case Intrinsic::assume:
+      case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
+      case Intrinsic::invariant_start:
+      case Intrinsic::invariant_end:
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+      case Intrinsic::objectsize:
+      case Intrinsic::ptr_annotation:
+      case Intrinsic::var_annotation:
+        return true;
+      }
+
+  return false;
+}
+
+static bool isValidAssumeForContext(Value *V, const Query &Q) {
+  Instruction *Inv = cast<Instruction>(V);
+
+  // There are two restrictions on the use of an assume:
+  //  1. The assume must dominate the context (or the control flow must
+  //     reach the assume whenever it reaches the context).
+  //  2. The context must not be in the assume's set of ephemeral values
+  //     (otherwise we will use the assume to prove that the condition
+  //     feeding the assume is trivially true, thus causing the removal of
+  //     the assume).
+
+  if (Q.DT) {
+    if (Q.DT->dominates(Inv, Q.CxtI)) {
+      return true;
+    } else if (Inv->getParent() == Q.CxtI->getParent()) {
+      // The context comes first, but they're both in the same block. Make sure
+      // there is nothing in between that might interrupt the control flow.
+      for (BasicBlock::const_iterator I =
+             std::next(BasicBlock::const_iterator(Q.CxtI)),
+                                      IE(Inv); I != IE; ++I)
+        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+          return false;
+
+      return !isEphemeralValueOf(Inv, Q.CxtI);
+    }
+
+    return false;
+  }
+
+  // When we don't have a DT, we do a limited search...
+  if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) {
+    return true;
+  } else if (Inv->getParent() == Q.CxtI->getParent()) {
+    // Search forward from the assume until we reach the context (or the end
+    // of the block); the common case is that the assume will come first.
+    for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+         IE = Inv->getParent()->end(); I != IE; ++I)
+      if (&*I == Q.CxtI)
+        return true;
+
+    // The context must come first...
+    for (BasicBlock::const_iterator I =
+           std::next(BasicBlock::const_iterator(Q.CxtI)),
+                                    IE(Inv); I != IE; ++I)
+      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+        return false;
+
+    return !isEphemeralValueOf(Inv, Q.CxtI);
+  }
+
+  return false;
+}
+
+bool llvm::isValidAssumeForContext(const Instruction *I,
+                                   const Instruction *CxtI,
+                                   const DominatorTree *DT) {
+  return ::isValidAssumeForContext(const_cast<Instruction *>(I),
+                                   Query(nullptr, CxtI, DT));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>,
+                        CmpClass_match<RHS, LHS, ICmpInst, ICmpInst::Predicate>>
+m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
+  return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::And>,
+                        BinaryOp_match<RHS, LHS, Instruction::And>>
+m_c_And(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_And(L, R), m_And(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Or>,
+                        BinaryOp_match<RHS, LHS, Instruction::Or>>
+m_c_Or(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_Or(L, R), m_Or(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Xor>,
+                        BinaryOp_match<RHS, LHS, Instruction::Xor>>
+m_c_Xor(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
+}
+
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.)  This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const DataLayout &DL,
+                                              unsigned Depth, const Query &Q) {
+  Value *LHS = Cmp->getOperand(0);
+  Value *RHS = Cmp->getOperand(1);
+  // TODO: We could potentially be more aggressive here.  This would be worth
+  // evaluating.  If we can, explore commoning this code with the assume
+  // handling logic.
+  if (LHS != V && RHS != V)
+    return;
+
+  const unsigned BitWidth = KnownZero.getBitWidth();
+
+  switch (Cmp->getPredicate()) {
+  default:
+    // We know nothing from this condition
+    break;
+  // TODO: implement unsigned bound from below (known one bits)
+  // TODO: common condition check implementations with assumes
+  // TODO: implement other patterns from assume (e.g. V & B == A)
+  case ICmpInst::ICMP_SGT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      if (LHS == V)
+        computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      else if (RHS == V)
+        computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      else
+        llvm_unreachable("missing use?");
+      KnownZero |= KnownZeroTemp;
+      KnownOne |= KnownOneTemp;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // The known zero bits carry over
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+      // power of 2, then one more).
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+        SignBits++;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction.  In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction.  This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const DataLayout &DL,
+                                                    unsigned Depth,
+                                                    const Query &Q) {
+  // Need both the dominator tree and the query location to do anything useful
+  if (!Q.DT || !Q.CxtI)
+    return;
+  Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+  // The context instruction might be in a statically unreachable block.  If
+  // so, asking dominator queries may yield suprising results.  (e.g. the block
+  // may not have a dom tree node)
+  if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
+    return;
+
+  // Avoid useless work
+  if (auto VI = dyn_cast<Instruction>(V))
+    if (VI->getParent() == Cxt->getParent())
+      return;
+
+  // Note: We currently implement two options.  It's not clear which of these
+  // will survive long term, we need data for that.
+  // Option 1 - Try walking the dominator tree looking for conditions which
+  // might apply.  This works well for local conditions (loop guards, etc..),
+  // but not as well for things far from the context instruction (presuming a
+  // low max blocks explored).  If we can set an high enough limit, this would
+  // be all we need.
+  // Option 2 - We restrict out search to those conditions which are uses of
+  // the value we're interested in.  This is independent of dom structure,
+  // but is slightly less powerful without looking through lots of use chains.
+  // It does handle conditions far from the context instruction (e.g. early
+  // function exits on entry) really well though.
+
+  // Option 1 - Search the dom tree
+  unsigned NumBlocksExplored = 0;
+  BasicBlock *Current = Cxt->getParent();
+  while (true) {
+    // Stop searching if we've gone too far up the chain
+    if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+      break;
+    NumBlocksExplored++;
+
+    if (!Q.DT->getNode(Current)->getIDom())
+      break;
+    Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+    if (!Current)
+      // found function entry
+      break;
+
+    BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+    if (!BI || BI->isUnconditional())
+      continue;
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!Cmp)
+      continue;
+
+    // We're looking for conditions that are guaranteed to hold at the context
+    // instruction.  Finding a condition where one path dominates the context
+    // isn't enough because both the true and false cases could merge before
+    // the context instruction we're actually interested in.  Instead, we need
+    // to ensure that the taken *edge* dominates the context instruction.  We
+    // know that the edge must be reachable since we started from a reachable
+    // block.
+    BasicBlock *BB0 = BI->getSuccessor(0);
+    BasicBlockEdge Edge(BI->getParent(), BB0);
+    if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+      continue;
+
+    computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                      Q);
+  }
+
+  // Option 2 - Search the other uses of V
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI || BI->isUnconditional())
+        continue;
+      // We're looking for conditions that are guaranteed to hold at the
+      // context instruction.  Finding a condition where one path dominates
+      // the context isn't enough because both the true and false cases could
+      // merge before the context instruction we're actually interested in.
+      // Instead, we need to ensure that the taken *edge* dominates the context
+      // instruction. 
+      BasicBlock *BB0 = BI->getSuccessor(0);
+      BasicBlockEdge Edge(BI->getParent(), BB0);
+      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+        continue;
+
+      computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                        Q);
+    }
+  }
+}
+
+static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+                                       APInt &KnownOne, const DataLayout &DL,
+                                       unsigned Depth, const Query &Q) {
+  // Use of assumptions is context-sensitive. If we don't have a context, we
+  // cannot use them!
+  if (!Q.AC || !Q.CxtI)
+    return;
+
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  for (auto &AssumeVH : Q.AC->assumptions()) {
+    if (!AssumeVH)
+      continue;
+    CallInst *I = cast<CallInst>(AssumeVH);
+    assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
+           "Got assumption for the wrong function!");
+    if (Q.ExclInvs.count(I))
+      continue;
+
+    // Warning: This loop can end up being somewhat performance sensetive.
+    // We're running this loop for once for each value queried resulting in a
+    // runtime of ~O(#assumes * #values).
+
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+           "must be an assume intrinsic");
+
+    Value *Arg = I->getArgOperand(0);
+
+    if (Arg == V && isValidAssumeForContext(I, Q)) {
+      assert(BitWidth == 1 && "assume operand is not i1?");
+      KnownZero.clearAllBits();
+      KnownOne.setAllBits();
+      return;
+    }
+
+    // The remaining tests are all recursive, so bail out if we hit the limit.
+    if (Depth == MaxDepth)
+      continue;
+
+    Value *A, *B;
+    auto m_V = m_CombineOr(m_Specific(V),
+                           m_CombineOr(m_PtrToInt(m_Specific(V)),
+                           m_BitCast(m_Specific(V))));
+
+    CmpInst::Predicate Pred;
+    ConstantInt *C;
+    // assume(v = a)
+    if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
+        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      KnownZero |= RHSKnownZero;
+      KnownOne  |= RHSKnownOne;
+    // assume(v & b = a)
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+      computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in the mask that are known to be one, we can propagate
+      // known bits from the RHS to V.
+      KnownZero |= RHSKnownZero & MaskKnownOne;
+      KnownOne  |= RHSKnownOne  & MaskKnownOne;
+    // assume(~(v & b) = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+      computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in the mask that are known to be one, we can propagate
+      // inverted known bits from the RHS to V.
+      KnownZero |= RHSKnownOne  & MaskKnownOne;
+      KnownOne  |= RHSKnownZero & MaskKnownOne;
+    // assume(v | b = a)
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+      computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in B that are known to be zero, we can propagate known
+      // bits from the RHS to V.
+      KnownZero |= RHSKnownZero & BKnownZero;
+      KnownOne  |= RHSKnownOne  & BKnownZero;
+    // assume(~(v | b) = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+      computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in B that are known to be zero, we can propagate
+      // inverted known bits from the RHS to V.
+      KnownZero |= RHSKnownOne  & BKnownZero;
+      KnownOne  |= RHSKnownZero & BKnownZero;
+    // assume(v ^ b = a)
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+      computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in B that are known to be zero, we can propagate known
+      // bits from the RHS to V. For those bits in B that are known to be one,
+      // we can propagate inverted known bits from the RHS to V.
+      KnownZero |= RHSKnownZero & BKnownZero;
+      KnownOne  |= RHSKnownOne  & BKnownZero;
+      KnownZero |= RHSKnownOne  & BKnownOne;
+      KnownOne  |= RHSKnownZero & BKnownOne;
+    // assume(~(v ^ b) = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+      computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+      // For those bits in B that are known to be zero, we can propagate
+      // inverted known bits from the RHS to V. For those bits in B that are
+      // known to be one, we can propagate known bits from the RHS to V.
+      KnownZero |= RHSKnownOne  & BKnownZero;
+      KnownOne  |= RHSKnownZero & BKnownZero;
+      KnownZero |= RHSKnownZero & BKnownOne;
+      KnownOne  |= RHSKnownOne  & BKnownOne;
+    // assume(v << c = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      // For those bits in RHS that are known, we can propagate them to known
+      // bits in V shifted to the right by C.
+      KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
+      KnownOne  |= RHSKnownOne.lshr(C->getZExtValue());
+    // assume(~(v << c) = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      // For those bits in RHS that are known, we can propagate them inverted
+      // to known bits in V shifted to the right by C.
+      KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
+      KnownOne  |= RHSKnownZero.lshr(C->getZExtValue());
+    // assume(v >> c = a)
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
+                                                m_AShr(m_V, m_ConstantInt(C))),
+                              m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      // For those bits in RHS that are known, we can propagate them to known
+      // bits in V shifted to the right by C.
+      KnownZero |= RHSKnownZero << C->getZExtValue();
+      KnownOne  |= RHSKnownOne  << C->getZExtValue();
+    // assume(~(v >> c) = a)
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
+                                             m_LShr(m_V, m_ConstantInt(C)),
+                                             m_AShr(m_V, m_ConstantInt(C)))),
+                                   m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+      // For those bits in RHS that are known, we can propagate them inverted
+      // to known bits in V shifted to the right by C.
+      KnownZero |= RHSKnownOne  << C->getZExtValue();
+      KnownOne  |= RHSKnownZero << C->getZExtValue();
+    // assume(v >=_s c) where c is non-negative
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      if (RHSKnownZero.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    // assume(v >_s c) where c is at least -1.
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    // assume(v <=_s c) where c is negative
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      if (RHSKnownOne.isNegative()) {
+        // We know that the sign bit is one.
+        KnownOne |= APInt::getSignBit(BitWidth);
+      }
+    // assume(v <_s c) where c is non-positive
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+        // We know that the sign bit is one.
+        KnownOne |= APInt::getSignBit(BitWidth);
+      }
+    // assume(v <=_u c)
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      // Whatever high bits in c are zero are known to be zero.
+      KnownZero |=
+        APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+    // assume(v <_u c)
+    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+               Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) {
+      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+      computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+      // Whatever high bits in c are zero are known to be zero (if c is a power
+      // of 2, then one more).
+      if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL))
+        KnownZero |=
+          APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
+      else
+        KnownZero |=
+          APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+    }
+  }
+}
+
+// Compute known bits from a shift operator, including those with a
+// non-constant shift amount. KnownZero and KnownOne are the outputs of this
+// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
+// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
+// functors that, given the known-zero or known-one bits respectively, and a
+// shift amount, compute the implied known-zero or known-one bits of the shift
+// operator's result respectively for that shift amount. The results from calling
+// KZF and KOF are conservatively combined for all permitted shift amounts.
+template <typename KZFunctor, typename KOFunctor>
+static void computeKnownBitsFromShiftOperator(Operator *I,
+              APInt &KnownZero, APInt &KnownOne,
+              APInt &KnownZero2, APInt &KnownOne2,
+              const DataLayout &DL, unsigned Depth, const Query &Q,
+              KZFunctor KZF, KOFunctor KOF) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    KnownZero = KZF(KnownZero, ShiftAmt);
+    KnownOne  = KOF(KnownOne, ShiftAmt);
+    return;
+  }
+
+  computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+
+  // Note: We cannot use KnownZero.getLimitedValue() here, because if
+  // BitWidth > 64 and any upper bits are known, we'll end up returning the
+  // limit value (which implies all bits are known).
+  uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
+  uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+
+  // It would be more-clearly correct to use the two temporaries for this
+  // calculation. Reusing the APInts here to prevent unnecessary allocations.
+  KnownZero.clearAllBits(), KnownOne.clearAllBits();
+
+  // If we know the shifter operand is nonzero, we can sometimes infer more
+  // known bits. However this is expensive to compute, so be lazy about it and
+  // only compute it when absolutely necessary.
+  Optional<bool> ShifterOperandIsNonZero;
+
+  // Early exit if we can't constrain any well-defined shift amount.
+  if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+    ShifterOperandIsNonZero =
+        isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+    if (!*ShifterOperandIsNonZero)
+      return;
+  }
+
+  computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
+    // Combine the shifted known input bits only for those shift amounts
+    // compatible with its known constraints.
+    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
+      continue;
+    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
+      continue;
+    // If we know the shifter is nonzero, we may be able to infer more known
+    // bits. This check is sunk down as far as possible to avoid the expensive
+    // call to isKnownNonZero if the cheaper checks above fail.
+    if (ShiftAmt == 0) {
+      if (!ShifterOperandIsNonZero.hasValue())
+        ShifterOperandIsNonZero =
+            isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+      if (*ShifterOperandIsNonZero)
+        continue;
+    }
+
+    KnownZero &= KZF(KnownZero2, ShiftAmt);
+    KnownOne  &= KOF(KnownOne2, ShiftAmt);
+  }
+
+  // If there are no compatible shift amounts, then we've proven that the shift
+  // amount must be >= the BitWidth, and the result is undefined. We could
+  // return anything we'd like, but we need to make sure the sets of known bits
+  // stay disjoint (it should be better for some other code to actually
+  // propagate the undef than to pick a value here using known bits).
+  if ((KnownZero & KnownOne) != 0)
+    KnownZero.clearAllBits(), KnownOne.clearAllBits();
+}
+
+static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
+                                         APInt &KnownOne, const DataLayout &DL,
+                                         unsigned Depth, const Query &Q) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::Load:
+    if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+    break;
+  case Instruction::And: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+
+    // and(x, add (x, -1)) is a common idiom that always clears the low bit;
+    // here we handle the more general case of adding any odd number by
+    // matching the form add(x, add(x, y)) where y is odd.
+    // TODO: This could be generalized to clearing any bit set in y where the
+    // following bit is known to be unset in y.
+    Value *Y = nullptr;
+    if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+                                      m_Value(Y))) ||
+        match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+                                      m_Value(Y)))) {
+      APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
+      computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q);
+      if (KnownOne3.countTrailingOnes() > 0)
+        KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+    }
+    break;
+  }
+  case Instruction::Or: {
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  }
+  case Instruction::Xor: {
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    break;
+  }
+  case Instruction::Mul: {
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
+                        KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
+    break;
+  }
+  case Instruction::UDiv: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+    break;
+  }
+  case Instruction::Select:
+    computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+    break; // Can't work with floating point.
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::AddrSpaceCast: // Pointers could be different sizes.
+    // FALL THROUGH and handle them the same as zext/trunc.
+  case Instruction::ZExt:
+  case Instruction::Trunc: {
+    Type *SrcTy = I->getOperand(0)->getType();
+
+    unsigned SrcBitWidth;
+    // Note that we handle pointer operands here because of inttoptr/ptrtoint
+    // which fall through here.
+    SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType());
+
+    assert(SrcBitWidth && "SrcBitWidth can't be zero");
+    KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+    KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    KnownZero = KnownZero.zextOrTrunc(BitWidth);
+    KnownOne = KnownOne.zextOrTrunc(BitWidth);
+    // Any top bits are known to be zero.
+    if (BitWidth > SrcBitWidth)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    break;
+  }
+  case Instruction::BitCast: {
+    Type *SrcTy = I->getOperand(0)->getType();
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
+         SrcTy->isFloatingPointTy()) &&
+        // TODO: For now, not handling conversions like:
+        // (bitcast i64 %x to <2 x i32>)
+        !I->getType()->isVectorTy()) {
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+      break;
+    }
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
+      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    break;
+  }
+  case Instruction::Shl: {
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return (KnownZero << ShiftAmt) |
+             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return KnownOne << ShiftAmt;
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
+    break;
+  }
+  case Instruction::LShr: {
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownZero, ShiftAmt) |
+             // High bits known zero.
+             APInt::getHighBitsSet(BitWidth, ShiftAmt);
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownOne, ShiftAmt);
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
+    break;
+  }
+  case Instruction::AShr: {
+    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownZero, ShiftAmt);
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownOne, ShiftAmt);
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
+    break;
+  }
+  case Instruction::Sub: {
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+                           KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                           Depth, Q);
+    break;
+  }
+  case Instruction::Add: {
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+                           KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                           Depth, Q);
+    break;
+  }
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1,
+                         Q);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      }
+    }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (KnownZero.isNonNegative()) {
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL,
+                       Depth + 1, Q);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero.setBit(BitWidth - 1);
+    }
+
+    break;
+  case Instruction::URem: {
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
+        KnownZero |= ~LowBits;
+        KnownOne &= LowBits;
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+    unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+    break;
+  }
+
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(I);
+    unsigned Align = AI->getAlignment();
+    if (Align == 0)
+      Align = DL.getABITypeAlignment(AI->getType()->getElementType());
+
+    if (Align > 0)
+      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    // Analyze all of the subscripts of this getelementptr instruction
+    // to determine if we can prove known low zero bits.
+    APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+    computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL,
+                     Depth + 1, Q);
+    unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+    gep_type_iterator GTI = gep_type_begin(I);
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+      Value *Index = I->getOperand(i);
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        // Handle struct member offset arithmetic.
+
+        // Handle case when index is vector zeroinitializer
+        Constant *CIndex = cast<Constant>(Index);
+        if (CIndex->isZeroValue())
+          continue;
+
+        if (CIndex->getType()->isVectorTy())
+          Index = CIndex->getSplatValue();
+
+        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+        const StructLayout *SL = DL.getStructLayout(STy);
+        uint64_t Offset = SL->getElementOffset(Idx);
+        TrailZ = std::min<unsigned>(TrailZ,
+                                    countTrailingZeros(Offset));
+      } else {
+        // Handle array index arithmetic.
+        Type *IndexedTy = GTI.getIndexedType();
+        if (!IndexedTy->isSized()) {
+          TrailZ = 0;
+          break;
+        }
+        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+        uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy);
+        LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+        computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1,
+                         Q);
+        TrailZ = std::min(TrailZ,
+                          unsigned(countTrailingZeros(TypeSize) +
+                                   LocalKnownZero.countTrailingOnes()));
+      }
+    }
+
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *P = cast<PHINode>(I);
+    // Handle the case of a simple two-predecessor recurrence PHI.
+    // There's a lot more that could theoretically be done here, but
+    // this is sufficient to catch some interesting cases.
+    if (P->getNumIncomingValues() == 2) {
+      for (unsigned i = 0; i != 2; ++i) {
+        Value *L = P->getIncomingValue(i);
+        Value *R = P->getIncomingValue(!i);
+        Operator *LU = dyn_cast<Operator>(L);
+        if (!LU)
+          continue;
+        unsigned Opcode = LU->getOpcode();
+        // Check for operations that have the property that if
+        // both their operands have low zero bits, the result
+        // will have low zero bits.
+        if (Opcode == Instruction::Add ||
+            Opcode == Instruction::Sub ||
+            Opcode == Instruction::And ||
+            Opcode == Instruction::Or ||
+            Opcode == Instruction::Mul) {
+          Value *LL = LU->getOperand(0);
+          Value *LR = LU->getOperand(1);
+          // Find a recurrence.
+          if (LL == I)
+            L = LR;
+          else if (LR == I)
+            L = LL;
+          else
+            break;
+          // Ok, we have a PHI of the form L op= R. Check for low
+          // zero bits.
+          computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+          // We need to take the minimum number of known bits
+          APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+          computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q);
+
+          KnownZero = APInt::getLowBitsSet(BitWidth,
+                                           std::min(KnownZero2.countTrailingOnes(),
+                                                    KnownZero3.countTrailingOnes()));
+          break;
+        }
+      }
+    }
+
+    // Unreachable blocks may have zero-operand PHI nodes.
+    if (P->getNumIncomingValues() == 0)
+      break;
+
+    // Otherwise take the unions of the known bit sets of the operands,
+    // taking conservative care to avoid excessive recursion.
+    if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+      // Skip if every incoming value references to ourself.
+      if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
+        break;
+
+      KnownZero = APInt::getAllOnesValue(BitWidth);
+      KnownOne = APInt::getAllOnesValue(BitWidth);
+      for (Value *IncValue : P->incoming_values()) {
+        // Skip direct self references.
+        if (IncValue == P) continue;
+
+        KnownZero2 = APInt(BitWidth, 0);
+        KnownOne2 = APInt(BitWidth, 0);
+        // Recurse, but cap the recursion to one level, because we don't
+        // want to waste time spinning around in loops.
+        computeKnownBits(IncValue, KnownZero2, KnownOne2, DL,
+                         MaxDepth - 1, Q);
+        KnownZero &= KnownZero2;
+        KnownOne &= KnownOne2;
+        // If all bits have been ruled out, there's no need to check
+        // more operands.
+        if (!KnownZero && !KnownOne)
+          break;
+      }
+    }
+    break;
+  }
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+    // If a range metadata is attached to this IntrinsicInst, intersect the
+    // explicit range specified by the metadata and the implicit range of
+    // the intrinsic.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap:
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        KnownZero |= KnownZero2.byteSwap();
+        KnownOne |= KnownOne2.byteSwap();
+        break;
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz: {
+        unsigned LowBits = Log2_32(BitWidth)+1;
+        // If this call is undefined for 0, the result will be less than 2^n.
+        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+          LowBits -= 1;
+        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        break;
+      }
+      case Intrinsic::ctpop: {
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        // We can bound the space the count needs.  Also, bits known to be zero
+        // can't contribute to the population.
+        unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+        unsigned LeadingZeros =
+          APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
+        assert(LeadingZeros <= BitWidth);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
+        KnownOne &= ~KnownZero;
+        // TODO: we could bound KnownOne using the lower bound on the number
+        // of bits which might be set provided by popcnt KnownOne2.
+        break;
+      }
+      case Intrinsic::fabs: {
+        Type *Ty = II->getType();
+        APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
+        KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
+        break;
+      }
+      case Intrinsic::x86_sse42_crc32_64_64:
+        KnownZero |= APInt::getHighBitsSet(64, 32);
+        break;
+      }
+    }
+    break;
+  case Instruction::ExtractValue:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+      ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+      if (EVI->getNumIndices() != 1) break;
+      if (EVI->getIndices()[0] == 0) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          computeKnownBitsAddSub(true, II->getArgOperand(0),
+                                 II->getArgOperand(1), false, KnownZero,
+                                 KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
+          break;
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+          computeKnownBitsAddSub(false, II->getArgOperand(0),
+                                 II->getArgOperand(1), false, KnownZero,
+                                 KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
+          break;
+        case Intrinsic::umul_with_overflow:
+        case Intrinsic::smul_with_overflow:
+          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
+                              KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                              Depth, Q);
+          break;
+        }
+      }
+    }
+  }
+}
+
+static unsigned getAlignment(const Value *V, const DataLayout &DL) {
+  unsigned Align = 0;
+  if (auto *GO = dyn_cast<GlobalObject>(V)) {
+    Align = GO->getAlignment();
+    if (Align == 0) {
+      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (GVar->isStrongDefinitionForLinker())
+            Align = DL.getPreferredAlignment(GVar);
+          else
+            Align = DL.getABITypeAlignment(ObjectType);
+        }
+      }
+    }
+  } else if (const Argument *A = dyn_cast<Argument>(V)) {
+    Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+    if (!Align && A->hasStructRetAttr()) {
+      // An sret parameter has at least the ABI alignment of the return type.
+      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+      if (EltTy->isSized())
+        Align = DL.getABITypeAlignment(EltTy);
+    }
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    Align = AI->getAlignment();
+  else if (auto CS = ImmutableCallSite(V))
+    Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+  else if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
+      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+      Align = CI->getLimitedValue();
+    }
+
+  return Align;
+}
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bit sets.
+///
+/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero.  If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type, and vectors of integers.  In the case
+/// where V is a vector, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+                      const DataLayout &DL, unsigned Depth, const Query &Q) {
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->isFPOrFPVectorTy() ||
+          V->getType()->getScalarType()->isPointerTy()) &&
+         "Not integer, floating point, or pointer type!");
+  assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth &&
+         KnownOne.getBitWidth() == BitWidth &&
+         "V, KnownOne and KnownZero should have same BitWidth");
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue();
+    KnownZero = ~KnownOne;
+    return;
+  }
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getAllOnesValue(BitWidth);
+    return;
+  }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.  There is no real need to handle ConstantVector here, because
+  // we don't handle undef in any particularly useful way.
+  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+    // We know that CDS must be a vector of integers. Take the intersection of
+    // each element.
+    KnownZero.setAllBits(); KnownOne.setAllBits();
+    APInt Elt(KnownZero.getBitWidth(), 0);
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      Elt = CDS->getElementAsInteger(i);
+      KnownZero &= ~Elt;
+      KnownOne &= Elt;
+    }
+    return;
+  }
+
+  // Start out not knowing anything.
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();
+
+  // Limit search depth.
+  // All recursive calls that increase depth must come after this.
+  if (Depth == MaxDepth)
+    return;
+
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (!GA->mayBeOverridden())
+      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
+    return;
+  }
+
+  if (Operator *I = dyn_cast<Operator>(V))
+    computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Aligned pointers have trailing zeros - refine KnownZero set
+  if (V->getType()->isPointerTy()) {
+    unsigned Align = getAlignment(V, DL);
+    if (Align)
+      KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+  }
+
+  // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
+  // strictly refines KnownZero and KnownOne. Therefore, we run them after
+  // computeKnownBitsFromOperator.
+
+  // Check whether a nearby assume intrinsic can determine some known bits.
+  computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Check whether there's a dominating condition which implies something about
+  // this value at the given context.
+  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+                                            Q);
+
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+}
+
+/// Determine whether the sign bit is known to be zero or one.
+/// Convenience wrapper around computeKnownBits.
+void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                    const DataLayout &DL, unsigned Depth, const Query &Q) {
+  unsigned BitWidth = getBitWidth(V->getType(), DL);
+  if (!BitWidth) {
+    KnownZero = false;
+    KnownOne = false;
+    return;
+  }
+  APInt ZeroBits(BitWidth, 0);
+  APInt OneBits(BitWidth, 0);
+  computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q);
+  KnownOne = OneBits[BitWidth - 1];
+  KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined. Supports values with integer or pointer
+/// types and vectors of integers.
+bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+                            const Query &Q, const DataLayout &DL) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return OrZero;
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return CI->getValue().isPowerOf2();
+    // TODO: Handle vector constants.
+  }
+
+  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
+  // it is shifted off the end then the result is undefined.
+  if (match(V, m_Shl(m_One(), m_Value())))
+    return true;
+
+  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+  // bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignBit(), m_Value())))
+    return true;
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  Value *X = nullptr, *Y = nullptr;
+  // A shift left or a logical shift right of a power of two is a power of two
+  // or zero.
+  if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
+                 match(V, m_LShr(m_Value(X), m_Value()))))
+    return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
+
+  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+    return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL);
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+    return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) &&
+           isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL);
+
+  if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
+    // A power of two and'd with anything is a power of two or zero.
+    if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) ||
+        isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL))
+      return true;
+    // X & (-X) is always a power of two or zero.
+    if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
+      return true;
+    return false;
+  }
+
+  // Adding a power-of-two or zero to the same power-of-two or zero yields
+  // either the original power-of-two, a larger power-of-two or zero.
+  if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+    OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
+    if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
+      if (match(X, m_And(m_Specific(Y), m_Value())) ||
+          match(X, m_And(m_Value(), m_Specific(Y))))
+        if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL))
+          return true;
+      if (match(Y, m_And(m_Specific(X), m_Value())) ||
+          match(Y, m_And(m_Value(), m_Specific(X))))
+        if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL))
+          return true;
+
+      unsigned BitWidth = V->getType()->getScalarSizeInBits();
+      APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
+      computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q);
+
+      APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
+      computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q);
+      // If i8 V is a power of two or zero:
+      //  ZeroBits: 1 1 1 0 1 1 1 1
+      // ~ZeroBits: 0 0 0 1 0 0 0 0
+      if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2())
+        // If OrZero isn't set, we cannot give back a zero result.
+        // Make sure either the LHS or RHS has a bit set.
+        if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue())
+          return true;
+    }
+  }
+
+  // An exact divide or right shift can only shift off zero bits, so the result
+  // is a power of two only if the first operand is a power of two and not
+  // copying a sign bit (sdiv int_min, 2).
+  if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
+      match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
+    return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
+                                  Depth, Q, DL);
+  }
+
+  return false;
+}
+
+/// \brief Test whether a GEP's result is known to be non-null.
+///
+/// Uses properties inherent in a GEP to try to determine whether it is known
+/// to be non-null.
+///
+/// Currently this routine does not support vector GEPs.
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL,
+                              unsigned Depth, const Query &Q) {
+  if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
+    return false;
+
+  // FIXME: Support vector-GEPs.
+  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
+
+  // If the base pointer is non-null, we cannot walk to a null address with an
+  // inbounds GEP in address space zero.
+  if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
+    return true;
+
+  // Walk the GEP operands and see if any operand introduces a non-zero offset.
+  // If so, then the GEP cannot produce a null pointer, as doing so would
+  // inherently violate the inbounds contract within address space zero.
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    // Struct types are easy -- they must always be indexed by a constant.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL.getStructLayout(STy);
+      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
+      if (ElementOffset > 0)
+        return true;
+      continue;
+    }
+
+    // If we have a zero-sized type, the index doesn't matter. Keep looping.
+    if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
+      continue;
+
+    // Fast path the constant operand case both for efficiency and so we don't
+    // increment Depth when just zipping down an all-constant GEP.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
+      if (!OpC->isZero())
+        return true;
+      continue;
+    }
+
+    // We post-increment Depth here because while isKnownNonZero increments it
+    // as well, when we pop back up that increment won't persist. We don't want
+    // to recurse 10k times just because we have 10k GEP operands. We don't
+    // bail completely out because we want to handle constant GEPs regardless
+    // of depth.
+    if (Depth++ >= MaxDepth)
+      continue;
+
+    if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q))
+      return true;
+  }
+
+  return false;
+}
+
+/// Does the 'Range' metadata (which must be a valid MD_range operand list)
+/// ensure that the value it's attached to is never Value?  'RangeType' is
+/// is the type of the value described by the range.
+static bool rangeMetadataExcludesValue(MDNode* Ranges,
+                                       const APInt& Value) {
+  const unsigned NumRanges = Ranges->getNumOperands() / 2;
+  assert(NumRanges >= 1);
+  for (unsigned i = 0; i < NumRanges; ++i) {
+    ConstantInt *Lower =
+        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
+    ConstantInt *Upper =
+        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
+    ConstantRange Range(Lower->getValue(), Upper->getValue());
+    if (Range.contains(Value))
+      return false;
+  }
+  return true;
+}
+
+/// Return true if the given value is known to be non-zero when defined.
+/// For vectors return true if every element is known to be non-zero when
+/// defined. Supports values with integer or pointer type and vectors of
+/// integers.
+bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+                    const Query &Q) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return false;
+    if (isa<ConstantInt>(C))
+      // Must be non-zero due to null test above.
+      return true;
+    // TODO: Handle vectors
+    return false;
+  }
+
+  if (Instruction* I = dyn_cast<Instruction>(V)) {
+    if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+      // If the possible ranges don't contain zero, then the value is
+      // definitely non-zero.
+      if (IntegerType* Ty = dyn_cast<IntegerType>(V->getType())) {
+        const APInt ZeroValue(Ty->getBitWidth(), 0);
+        if (rangeMetadataExcludesValue(Ranges, ZeroValue))
+          return true;
+      }
+    }
+  }
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ >= MaxDepth)
+    return false;
+
+  // Check for pointer simplifications.
+  if (V->getType()->isPointerTy()) {
+    if (isKnownNonNull(V))
+      return true; 
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      if (isGEPKnownNonNull(GEP, DL, Depth, Q))
+        return true;
+  }
+
+  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL);
+
+  // X | Y != 0 if X != 0 or Y != 0.
+  Value *X = nullptr, *Y = nullptr;
+  if (match(V, m_Or(m_Value(X), m_Value(Y))))
+    return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q);
+
+  // ext X != 0 if X != 0.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q);
+
+  // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
+  // if the lowest bit is shifted off the end.
+  if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+    // shl nuw can't remove any non-zero bits.
+    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    if (BO->hasNoUnsignedWrap())
+      return isKnownNonZero(X, DL, Depth, Q);
+
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+    if (KnownOne[0])
+      return true;
+  }
+  // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
+  // defined if the sign bit is shifted off the end.
+  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+    // shr exact can only shift out zero bits.
+    PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, DL, Depth, Q);
+
+    bool XKnownNonNegative, XKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
+    if (XKnownNegative)
+      return true;
+
+    // If the shifter operand is a constant, and all of the bits shifted
+    // out are known to be zero, and X is known non-zero then at least one
+    // non-zero bit must remain.
+    if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+      
+      auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
+      // Is there a known one in the portion not shifted out?
+      if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+        return true;
+      // Are all the bits to be shifted out known zero?
+      if (KnownZero.countTrailingOnes() >= ShiftVal)
+        return isKnownNonZero(X, DL, Depth, Q);
+    }
+  }
+  // div exact can only produce a zero if the dividend is zero.
+  else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
+    return isKnownNonZero(X, DL, Depth, Q);
+  }
+  // X + Y.
+  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+    bool XKnownNonNegative, XKnownNegative;
+    bool YKnownNonNegative, YKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
+    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q);
+
+    // If X and Y are both non-negative (as signed values) then their sum is not
+    // zero unless both X and Y are zero.
+    if (XKnownNonNegative && YKnownNonNegative)
+      if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q))
+        return true;
+
+    // If X and Y are both negative (as signed values) then their sum is not
+    // zero unless both X and Y equal INT_MIN.
+    if (BitWidth && XKnownNegative && YKnownNegative) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      APInt Mask = APInt::getSignedMaxValue(BitWidth);
+      // The sign bit of X is set.  If some other bit is set then X is not equal
+      // to INT_MIN.
+      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+      if ((KnownOne & Mask) != 0)
+        return true;
+      // The sign bit of Y is set.  If some other bit is set then Y is not equal
+      // to INT_MIN.
+      computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q);
+      if ((KnownOne & Mask) != 0)
+        return true;
+    }
+
+    // The sum of a non-negative number and a power of two is not zero.
+    if (XKnownNonNegative &&
+        isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL))
+      return true;
+    if (YKnownNonNegative &&
+        isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL))
+      return true;
+  }
+  // X * Y.
+  else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
+    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    // If X and Y are non-zero then so is X * Y as long as the multiplication
+    // does not overflow.
+    if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+        isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q))
+      return true;
+  }
+  // (C ? X : Y) != 0 if X != 0 and Y != 0.
+  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) &&
+        isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
+      return true;
+  }
+  // PHI
+  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // Try and detect a recurrence that monotonically increases from a
+    // starting value, as these are common as induction variables.
+    if (PN->getNumIncomingValues() == 2) {
+      Value *Start = PN->getIncomingValue(0);
+      Value *Induction = PN->getIncomingValue(1);
+      if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
+        std::swap(Start, Induction);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
+        if (!C->isZero() && !C->isNegative()) {
+          ConstantInt *X;
+          if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+               match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
+              !X->isNegative())
+            return true;
+        }
+      }
+    }
+  }
+
+  if (!BitWidth) return false;
+  APInt KnownZero(BitWidth, 0);
+  APInt KnownOne(BitWidth, 0);
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
+  return KnownOne != 0;
+}
+
+/// Return true if V2 == V1 + X, where X is known non-zero.
+static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL,
+                           const Query &Q) {
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+  if (!BO || BO->getOpcode() != Instruction::Add)
+    return false;
+  Value *Op = nullptr;
+  if (V2 == BO->getOperand(0))
+    Op = BO->getOperand(1);
+  else if (V2 == BO->getOperand(1))
+    Op = BO->getOperand(0);
+  else
+    return false;
+  return isKnownNonZero(Op, DL, 0, Q);
+}
+
+/// Return true if it is known that V1 != V2.
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                            const Query &Q) {
+  if (V1->getType()->isVectorTy() || V1 == V2)
+    return false;
+  if (V1->getType() != V2->getType())
+    // We can't look through casts yet.
+    return false;
+  if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q))
+    return true;
+
+  if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+    // Are any known bits in V1 contradictory to known bits in V2? If V1
+    // has a known zero where V2 has a known one, they must not be equal.
+    auto BitWidth = Ty->getBitWidth();
+    APInt KnownZero1(BitWidth, 0);
+    APInt KnownOne1(BitWidth, 0);
+    computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q);
+    APInt KnownZero2(BitWidth, 0);
+    APInt KnownOne2(BitWidth, 0);
+    computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q);
+
+    auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
+    if (OppositeBits.getBoolValue())
+      return true;
+  }
+  return false;
+}
+
+/// Return true if 'V & Mask' is known to be zero.  We use this predicate to
+/// simplify operations downstream. Mask is known to be zero for bits that V
+/// cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type, and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+                       unsigned Depth, const Query &Q) {
+  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
+  return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// Return the number of times the sign bit of the register is replicated into
+/// the other bits. We know that at least 1 bit is always equal to the sign bit
+/// (itself), but other cases can give us information. For example, immediately
+/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
+/// other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth,
+                            const Query &Q) {
+  unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType());
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  // Note that ConstantInt is handled by the general computeKnownBits case
+  // below.
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  Operator *U = dyn_cast<Operator>(V);
+  switch (Operator::getOpcode(V)) {
+  default: break;
+  case Instruction::SExt:
+    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+    return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp;
+
+  case Instruction::SDiv: {
+    const APInt *Denominator;
+    // sdiv X, C -> adds log(C) sign bits.
+    if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+      // Ignore non-positive denominator.
+      if (!Denominator->isStrictlyPositive())
+        break;
+
+      // Calculate the incoming numerator bits.
+      unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+      // Add floor(log(C)) bits to the numerator bits.
+      return std::min(TyBits, NumBits + Denominator->logBase2());
+    }
+    break;
+  }
+
+  case Instruction::SRem: {
+    const APInt *Denominator;
+    // srem X, C -> we know that the result is within [-C+1,C) when C is a
+    // positive constant.  This let us put a lower bound on the number of sign
+    // bits.
+    if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+      // Ignore non-positive denominator.
+      if (!Denominator->isStrictlyPositive())
+        break;
+
+      // Calculate the incoming numerator bits. SRem by a positive constant
+      // can't lower the number of sign bits.
+      unsigned NumrBits =
+          ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+      // Calculate the leading sign bit constraints by examining the
+      // denominator.  Given that the denominator is positive, there are two
+      // cases:
+      //
+      //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
+      //     (1 << ceilLogBase2(C)).
+      //
+      //  2. the numerator is negative.  Then the result range is (-C,0] and
+      //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+      //
+      // Thus a lower bound on the number of sign bits is `TyBits -
+      // ceilLogBase2(C)`.
+
+      unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+      return std::max(NumrBits, ResBits);
+    }
+    break;
+  }
+
+  case Instruction::AShr: {
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+    // ashr X, C   -> adds C sign bits.  Vectors too.
+    const APInt *ShAmt;
+    if (match(U->getOperand(1), m_APInt(ShAmt))) {
+      Tmp += ShAmt->getZExtValue();
+      if (Tmp > TyBits) Tmp = TyBits;
+    }
+    return Tmp;
+  }
+  case Instruction::Shl: {
+    const APInt *ShAmt;
+    if (match(U->getOperand(1), m_APInt(ShAmt))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+      Tmp2 = ShAmt->getZExtValue();
+      if (Tmp2 >= TyBits ||      // Bad shift.
+          Tmp2 >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - Tmp2;
+    }
+    break;
+  }
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // computeKnownBits, and pick whichever answer is better.
+    }
+    break;
+
+  case Instruction::Select:
+    Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q);
+    return std::min(Tmp, Tmp2);
+
+  case Instruction::Add:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+    if (Tmp == 1) return 1;  // Early out.
+
+    // Special case decrementing a value (ADD X, -1):
+    if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+          return TyBits;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
+    if (Tmp2 == 1) return 1;
+    return std::min(Tmp, Tmp2)-1;
+
+  case Instruction::Sub:
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
+    if (Tmp2 == 1) return 1;
+
+    // Handle NEG.
+    if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+          return TyBits;
+
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+
+        // Otherwise, we treat this like a SUB.
+      }
+
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+    if (Tmp == 1) return 1;  // Early out.
+    return std::min(Tmp, Tmp2)-1;
+
+  case Instruction::PHI: {
+    PHINode *PN = cast<PHINode>(U);
+    unsigned NumIncomingValues = PN->getNumIncomingValues();
+    // Don't analyze large in-degree PHIs.
+    if (NumIncomingValues > 4) break;
+    // Unreachable blocks may have zero-operand PHI nodes.
+    if (NumIncomingValues == 0) break;
+
+    // Take the minimum of all incoming values.  This can't infinitely loop
+    // because of our depth threshold.
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q);
+    for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
+      if (Tmp == 1) return Tmp;
+      Tmp = std::min(
+          Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q));
+    }
+    return Tmp;
+  }
+
+  case Instruction::Trunc:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+  APInt Mask;
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
+
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-TyBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// This function computes the integer multiple of Base that equals V.
+/// If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                           bool LookThroughSExt, unsigned Depth) {
+  const unsigned MaxDepth = 6;
+
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+  Type *T = V->getType();
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+  if (Base == 0)
+    return false;
+
+  if (Base == 1) {
+    Multiple = V;
+    return true;
+  }
+
+  ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+  Constant *BaseVal = ConstantInt::get(T, Base);
+  if (CO && CO == BaseVal) {
+    // Multiple is 1.
+    Multiple = ConstantInt::get(T, 1);
+    return true;
+  }
+
+  if (CI && CI->getZExtValue() % Base == 0) {
+    Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+    return true;
+  }
+
+  if (Depth == MaxDepth) return false;  // Limit search depth.
+
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::SExt:
+    if (!LookThroughSExt) return false;
+    // otherwise fall through to ZExt
+  case Instruction::ZExt:
+    return ComputeMultiple(I->getOperand(0), Base, Multiple,
+                           LookThroughSExt, Depth+1);
+  case Instruction::Shl:
+  case Instruction::Mul: {
+    Value *Op0 = I->getOperand(0);
+    Value *Op1 = I->getOperand(1);
+
+    if (I->getOpcode() == Instruction::Shl) {
+      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+      if (!Op1CI) return false;
+      // Turn Op0 << Op1 into Op0 * 2^Op1
+      APInt Op1Int = Op1CI->getValue();
+      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+      APInt API(Op1Int.getBitWidth(), 0);
+      API.setBit(BitToSet);
+      Op1 = ConstantInt::get(V->getContext(), API);
+    }
+
+    Value *Mul0 = nullptr;
+    if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+      if (Constant *Op1C = dyn_cast<Constant>(Op1))
+        if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+          if (Op1C->getType()->getPrimitiveSizeInBits() <
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+          if (Op1C->getType()->getPrimitiveSizeInBits() >
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+          // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+          Multiple = ConstantExpr::getMul(MulC, Op1C);
+          return true;
+        }
+
+      if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+        if (Mul0CI->getValue() == 1) {
+          // V == Base * Op1, so return Op1
+          Multiple = Op1;
+          return true;
+        }
+    }
+
+    Value *Mul1 = nullptr;
+    if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+      if (Constant *Op0C = dyn_cast<Constant>(Op0))
+        if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+          if (Op0C->getType()->getPrimitiveSizeInBits() <
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+          if (Op0C->getType()->getPrimitiveSizeInBits() >
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+          // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+          Multiple = ConstantExpr::getMul(MulC, Op0C);
+          return true;
+        }
+
+      if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+        if (Mul1CI->getValue() == 1) {
+          // V == Base * Op0, so return Op0
+          Multiple = Op0;
+          return true;
+        }
+    }
+  }
+  }
+
+  // We could not determine if V is a multiple of Base.
+  return false;
+}
+
+/// Return true if we can prove that the specified FP value is never equal to
+/// -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->getValueAPF().isNegZero();
+
+  // FIXME: Magic number! At the least, this should be given a name because it's
+  // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
+  // expose it as a parameter, so it can be used for testing / experimenting.
+  if (Depth == 6)
+    return false;  // Limit search depth.
+
+  const Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  // Check if the nsz fast-math flag is set
+  if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
+    if (FPO->hasNoSignedZeros())
+      return true;
+
+  // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+  if (I->getOpcode() == Instruction::FAdd)
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
+      if (CFP->isNullValue())
+        return true;
+
+  // sitofp and uitofp turn into +0.0 for zero.
+  if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+    return true;
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    // sqrt(-0.0) = -0.0, no other negative results are possible.
+    if (II->getIntrinsicID() == Intrinsic::sqrt)
+      return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction()) {
+      if (F->isDeclaration()) {
+        // abs(x) != -0.0
+        if (F->getName() == "abs") return true;
+        // fabs[lf](x) != -0.0
+        if (F->getName() == "fabs") return true;
+        if (F->getName() == "fabsf") return true;
+        if (F->getName() == "fabsl") return true;
+        if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+            F->getName() == "sqrtl")
+          return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+      }
+    }
+
+  return false;
+}
+
+bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+
+  // FIXME: Magic number! At the least, this should be given a name because it's
+  // used similarly in CannotBeNegativeZero(). A better fix may be to
+  // expose it as a parameter, so it can be used for testing / experimenting.
+  if (Depth == 6)
+    return false;  // Limit search depth.
+
+  const Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::FMul:
+    // x*x is always non-negative or a NaN.
+    if (I->getOperand(0) == I->getOperand(1)) 
+      return true;
+    // Fall through
+  case Instruction::FAdd:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+           CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+  case Instruction::FPExt:
+  case Instruction::FPTrunc:
+    // Widening/narrowing never change sign.
+    return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+  case Instruction::Call: 
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::exp:
+      case Intrinsic::exp2:
+      case Intrinsic::fabs:
+      case Intrinsic::sqrt:
+        return true;
+      case Intrinsic::powi: 
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+          // powi(x,n) is non-negative if n is even.
+          if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+            return true;
+        }
+        return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+      case Intrinsic::fma:
+      case Intrinsic::fmuladd:
+        // x*x+y is non-negative if y is non-negative.
+        return I->getOperand(0) == I->getOperand(1) && 
+               CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
+      }
+    break;
+  }
+  return false; 
+}
+
+/// If the specified value can be set by repeating the same byte in memory,
+/// return the i8 value that it is represented with.  This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+  // All byte-wide stores are splatable, even of arbitrary variables.
+  if (V->getType()->isIntegerTy(8)) return V;
+
+  // Handle 'null' ConstantArrayZero etc.
+  if (Constant *C = dyn_cast<Constant>(V))
+    if (C->isNullValue())
+      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+
+  // Constant float and double values can be handled as integer values if the
+  // corresponding integer value is "byteable".  An important case is 0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+    // Don't handle long double formats, which have strange constraints.
+  }
+
+  // We can handle constant integers that are multiple of 8 bits.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getBitWidth() % 8 == 0) {
+      assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
+
+      if (!CI->getValue().isSplat(8))
+        return nullptr;
+      return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
+    }
+  }
+
+  // A ConstantDataArray/Vector is splatable if all its members are equal and
+  // also splatable.
+  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
+    Value *Elt = CA->getElementAsConstant(0);
+    Value *Val = isBytewiseValue(Elt);
+    if (!Val)
+      return nullptr;
+
+    for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
+      if (CA->getElementAsConstant(I) != Elt)
+        return nullptr;
+
+    return Val;
+  }
+
+  // Conceptually, we could handle things like:
+  //   %a = zext i8 %X to i16
+  //   %b = shl i16 %a, 8
+  //   %c = or i16 %a, %b
+  // but until there is an example that actually needs this, it doesn't seem
+  // worth worrying about.
+  return nullptr;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
+                                SmallVectorImpl<unsigned> &Idxs,
+                                unsigned IdxSkip,
+                                Instruction *InsertBefore) {
+  llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
+  if (STy) {
+    // Save the original To argument so we can modify it
+    Value *OrigTo = To;
+    // General case, the type indexed by Idxs is a struct
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      // Process each struct element recursively
+      Idxs.push_back(i);
+      Value *PrevTo = To;
+      To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+                             InsertBefore);
+      Idxs.pop_back();
+      if (!To) {
+        // Couldn't find any inserted value for this index? Cleanup
+        while (PrevTo != OrigTo) {
+          InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+          PrevTo = Del->getAggregateOperand();
+          Del->eraseFromParent();
+        }
+        // Stop processing elements
+        break;
+      }
+    }
+    // If we successfully found a value for each of our subaggregates
+    if (To)
+      return To;
+  }
+  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+  // the struct's elements had a value that was inserted directly. In the latter
+  // case, perhaps we can't determine each of the subelements individually, but
+  // we might be able to find the complete struct somewhere.
+
+  // Find the value that is at that particular spot
+  Value *V = FindInsertedValue(From, Idxs);
+
+  if (!V)
+    return nullptr;
+
+  // Insert the value in the new (sub) aggregrate
+  return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
+                                       "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
+                                Instruction *InsertBefore) {
+  assert(InsertBefore && "Must have someplace to insert!");
+  Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+                                                             idx_range);
+  Value *To = UndefValue::get(IndexedType);
+  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
+  unsigned IdxSkip = Idxs.size();
+
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+                               Instruction *InsertBefore) {
+  // Nothing to index? Just return V then (this is useful at the end of our
+  // recursion).
+  if (idx_range.empty())
+    return V;
+  // We have indices, so V should have an indexable type.
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
+         "Not looking at a struct or array?");
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
+         "Invalid indices for type?");
+
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    C = C->getAggregateElement(idx_range[0]);
+    if (!C) return nullptr;
+    return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
+  }
+
+  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+    // Loop the indices for the insertvalue instruction in parallel with the
+    // requested indices
+    const unsigned *req_idx = idx_range.begin();
+    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+         i != e; ++i, ++req_idx) {
+      if (req_idx == idx_range.end()) {
+        // We can't handle this without inserting insertvalues
+        if (!InsertBefore)
+          return nullptr;
+
+        // The requested index identifies a part of a nested aggregate. Handle
+        // this specially. For example,
+        // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+        // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+        // %C = extractvalue {i32, { i32, i32 } } %B, 1
+        // This can be changed into
+        // %A = insertvalue {i32, i32 } undef, i32 10, 0
+        // %C = insertvalue {i32, i32 } %A, i32 11, 1
+        // which allows the unused 0,0 element from the nested struct to be
+        // removed.
+        return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
+                                 InsertBefore);
+      }
+
+      // This insert value inserts something else than what we are looking for.
+      // See if the (aggregate) value inserted into has the value we are
+      // looking for, then.
+      if (*req_idx != *i)
+        return FindInsertedValue(I->getAggregateOperand(), idx_range,
+                                 InsertBefore);
+    }
+    // If we end up here, the indices of the insertvalue match with those
+    // requested (though possibly only partially). Now we recursively look at
+    // the inserted value, passing any remaining indices.
+    return FindInsertedValue(I->getInsertedValueOperand(),
+                             makeArrayRef(req_idx, idx_range.end()),
+                             InsertBefore);
+  }
+
+  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+    // If we're extracting a value from an aggregate that was extracted from
+    // something else, we can extract from that something else directly instead.
+    // However, we will need to chain I's indices with the requested indices.
+
+    // Calculate the number of indices required
+    unsigned size = I->getNumIndices() + idx_range.size();
+    // Allocate some space to put the new indices in
+    SmallVector<unsigned, 5> Idxs;
+    Idxs.reserve(size);
+    // Add indices from the extract value instruction
+    Idxs.append(I->idx_begin(), I->idx_end());
+
+    // Add requested indices
+    Idxs.append(idx_range.begin(), idx_range.end());
+
+    assert(Idxs.size() == size
+           && "Number of indices added not correct?");
+
+    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
+  }
+  // Otherwise, we don't know (such as, extracting from a function return value
+  // or load instruction)
+  return nullptr;
+}
+
+/// Analyze the specified pointer to see if it can be expressed as a base
+/// pointer plus a constant offset. Return the base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                              const DataLayout &DL) {
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
+  APInt ByteOffset(BitWidth, 0);
+
+  // We walk up the defs but use a visited set to handle unreachable code. In
+  // that case, we stop after accumulating the cycle once (not that it
+  // matters).
+  SmallPtrSet<Value *, 16> Visited;
+  while (Visited.insert(Ptr).second) {
+    if (Ptr->getType()->isVectorTy())
+      break;
+
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+      APInt GEPOffset(BitWidth, 0);
+      if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+        break;
+
+      ByteOffset += GEPOffset;
+
+      Ptr = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
+               Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
+      Ptr = cast<Operator>(Ptr)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
+      if (GA->mayBeOverridden())
+        break;
+      Ptr = GA->getAliasee();
+    } else {
+      break;
+    }
+  }
+  Offset = ByteOffset.getSExtValue();
+  return Ptr;
+}
+
+
+/// This function computes the length of a null-terminated C string pointed to
+/// by V. If successful, it returns true and returns the string in Str.
+/// If unsuccessful, it returns false.
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+                                 uint64_t Offset, bool TrimAtNul) {
+  assert(V);
+
+  // Look through bitcast instructions and geps.
+  V = V->stripPointerCasts();
+
+  // If the value is a GEP instruction or constant expression, treat it as an
+  // offset.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    // Make sure the GEP has exactly three arguments.
+    if (GEP->getNumOperands() != 3)
+      return false;
+
+    // Make sure the index-ee is a pointer to array of i8.
+    PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+    ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+    if (!AT || !AT->getElementType()->isIntegerTy(8))
+      return false;
+
+    // Check to make sure that the first operand of the GEP is an integer and
+    // has value 0 so that we are sure we're indexing into the initializer.
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    if (!FirstIdx || !FirstIdx->isZero())
+      return false;
+
+    // If the second index isn't a ConstantInt, then this is a variable index
+    // into the array.  If this occurs, we can't say anything meaningful about
+    // the string.
+    uint64_t StartIdx = 0;
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+      StartIdx = CI->getZExtValue();
+    else
+      return false;
+    return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
+                                 TrimAtNul);
+  }
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    return false;
+
+  // Handle the all-zeros case
+  if (GV->getInitializer()->isNullValue()) {
+    // This is a degenerate case. The initializer is constant zero so the
+    // length of the string must be zero.
+    Str = "";
+    return true;
+  }
+
+  // Must be a Constant Array
+  const ConstantDataArray *Array =
+    dyn_cast<ConstantDataArray>(GV->getInitializer());
+  if (!Array || !Array->isString())
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getArrayNumElements();
+
+  // Start out with the entire array in the StringRef.
+  Str = Array->getAsString();
+
+  if (Offset > NumElts)
+    return false;
+
+  // Skip over 'offset' bytes.
+  Str = Str.substr(Offset);
+
+  if (TrimAtNul) {
+    // Trim off the \0 and anything after it.  If the array is not nul
+    // terminated, we just return the whole end of string.  The client may know
+    // some other way that the string is length-bound.
+    Str = Str.substr(0, Str.find('\0'));
+  }
+  return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
+  // Look through noop bitcast instructions.
+  V = V->stripPointerCasts();
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN).second)
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (Value *IncValue : PN->incoming_values()) {
+      uint64_t Len = GetStringLengthH(IncValue, PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // Otherwise, see if we can read the string.
+  StringRef StrData;
+  if (!getConstantStringInfo(V, StrData))
+    return 0;
+
+  return StrData.size()+1;
+}
+
+/// If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}
+
+/// \brief \p PN defines a loop-variant pointer to an object.  Check if the
+/// previous iteration of the loop was referring to the same object as \p PN.
+static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+  // Find the loop-defined value.
+  Loop *L = LI->getLoopFor(PN->getParent());
+  if (PN->getNumIncomingValues() != 2)
+    return true;
+
+  // Find the value from previous iteration.
+  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    return true;
+
+  // If a new pointer is loaded in the loop, the pointer references a different
+  // object in every iteration.  E.g.:
+  //    for (i)
+  //       int *p = a[i];
+  //       ...
+  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
+    if (!L->isLoopInvariant(Load->getPointerOperand()))
+      return false;
+  return true;
+}
+
+Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
+                                 unsigned MaxLookup) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+               Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      // See if InstructionSimplify knows any relevant tricks.
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Acquire a DominatorTree and AssumptionCache and use them.
+        if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
+          V = Simplified;
+          continue;
+        }
+
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
+
+void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+                                const DataLayout &DL, LoopInfo *LI,
+                                unsigned MaxLookup) {
+  SmallPtrSet<Value *, 4> Visited;
+  SmallVector<Value *, 4> Worklist;
+  Worklist.push_back(V);
+  do {
+    Value *P = Worklist.pop_back_val();
+    P = GetUnderlyingObject(P, DL, MaxLookup);
+
+    if (!Visited.insert(P).second)
+      continue;
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    if (PHINode *PN = dyn_cast<PHINode>(P)) {
+      // If this PHI changes the underlying object in every iteration of the
+      // loop, don't look through it.  Consider:
+      //   int **A;
+      //   for (i) {
+      //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
+      //     Curr = A[i];
+      //     *Prev, *Curr;
+      //
+      // Prev is tracking Curr one iteration behind so they refer to different
+      // underlying objects.
+      if (!LI || !LI->isLoopHeader(PN->getParent()) ||
+          isSameUnderlyingObjectInLoop(PN, LI))
+        for (Value *IncValue : PN->incoming_values())
+          Worklist.push_back(IncValue);
+      continue;
+    }
+
+    Objects.push_back(P);
+  } while (!Worklist.empty());
+}
+
+/// Return true if the only users of this pointer are lifetime markers.
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+  for (const User *U : V->users()) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+    if (!II) return false;
+
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      return false;
+  }
+  return true;
+}
+
+static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset,
+                                           Type *Ty, const DataLayout &DL,
+                                           const Instruction *CtxI,
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI) {
+  assert(Offset.isNonNegative() && "offset can't be negative");
+  assert(Ty->isSized() && "must be sized");
+  
+  APInt DerefBytes(Offset.getBitWidth(), 0);
+  bool CheckForNonNull = false;
+  if (const Argument *A = dyn_cast<Argument>(BV)) {
+    DerefBytes = A->getDereferenceableBytes();
+    if (!DerefBytes.getBoolValue()) {
+      DerefBytes = A->getDereferenceableOrNullBytes();
+      CheckForNonNull = true;
+    }
+  } else if (auto CS = ImmutableCallSite(BV)) {
+    DerefBytes = CS.getDereferenceableBytes(0);
+    if (!DerefBytes.getBoolValue()) {
+      DerefBytes = CS.getDereferenceableOrNullBytes(0);
+      CheckForNonNull = true;
+    }
+  } else if (const LoadInst *LI = dyn_cast<LoadInst>(BV)) {
+    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) {
+      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+      DerefBytes = CI->getLimitedValue();
+    }
+    if (!DerefBytes.getBoolValue()) {
+      if (MDNode *MD = 
+              LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) {
+        ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+        DerefBytes = CI->getLimitedValue();
+      }
+      CheckForNonNull = true;
+    }
+  }
+  
+  if (DerefBytes.getBoolValue())
+    if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty)))
+      if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI))
+        return true;
+
+  return false;
+}
+
+static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
+                                           const Instruction *CtxI,
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI) {
+  Type *VTy = V->getType();
+  Type *Ty = VTy->getPointerElementType();
+  if (!Ty->isSized())
+    return false;
+  
+  APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+  return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
+}
+
+static bool isAligned(const Value *Base, APInt Offset, unsigned Align,
+                      const DataLayout &DL) {
+  APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL));
+
+  if (!BaseAlign) {
+    Type *Ty = Base->getType()->getPointerElementType();
+    if (!Ty->isSized())
+      return false;
+    BaseAlign = DL.getABITypeAlignment(Ty);
+  }
+
+  APInt Alignment(Offset.getBitWidth(), Align);
+
+  assert(Alignment.isPowerOf2() && "must be a power of 2!");
+  return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+}
+
+static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
+  Type *Ty = Base->getType();
+  assert(Ty->isSized() && "must be sized");
+  APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+  return isAligned(Base, Offset, Align, DL);
+}
+
+/// Test if V is always a pointer to allocated and suitably aligned memory for
+/// a simple load or store.
+static bool isDereferenceableAndAlignedPointer(
+    const Value *V, unsigned Align, const DataLayout &DL,
+    const Instruction *CtxI, const DominatorTree *DT,
+    const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+
+  // These are obviously ok if aligned.
+  if (isa<AllocaInst>(V))
+    return isAligned(V, Align, DL);
+
+  // It's not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. However,
+  // if we're casting from a pointer from a type of larger size
+  // to a type of smaller size (or the same size), and the alignment
+  // is at least as large as for the resulting pointer type, then
+  // we can look through the bitcast.
+  if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
+    Type *STy = BC->getSrcTy()->getPointerElementType(),
+         *DTy = BC->getDestTy()->getPointerElementType();
+    if (STy->isSized() && DTy->isSized() &&
+        (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
+        (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
+      return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL,
+                                                CtxI, DT, TLI, Visited);
+  }
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    if (!GV->hasExternalWeakLinkage())
+      return isAligned(V, Align, DL);
+
+  // byval arguments are okay.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    if (A->hasByValAttr())
+      return isAligned(V, Align, DL);
+
+  if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
+    return isAligned(V, Align, DL);
+
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    Type *VTy = GEP->getType();
+    Type *Ty = VTy->getPointerElementType();
+    const Value *Base = GEP->getPointerOperand();
+
+    // Conservatively require that the base pointer be fully dereferenceable
+    // and aligned.
+    if (!Visited.insert(Base).second)
+      return false;
+    if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI,
+                                            Visited))
+      return false;
+
+    APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
+    if (!GEP->accumulateConstantOffset(DL, Offset))
+      return false;
+
+    // Check if the load is within the bounds of the underlying object
+    // and offset is aligned.
+    uint64_t LoadSize = DL.getTypeStoreSize(Ty);
+    Type *BaseType = Base->getType()->getPointerElementType();
+    assert(isPowerOf2_32(Align) && "must be a power of 2!");
+    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && 
+           !(Offset & APInt(Offset.getBitWidth(), Align-1));
+  }
+
+  // For gc.relocate, look through relocations
+  if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
+    return isDereferenceableAndAlignedPointer(
+        RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
+
+  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
+                                              CtxI, DT, TLI, Visited);
+
+  // If we don't know, assume the worst.
+  return false;
+}
+
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+                                              const DataLayout &DL,
+                                              const Instruction *CtxI,
+                                              const DominatorTree *DT,
+                                              const TargetLibraryInfo *TLI) {
+  // When dereferenceability information is provided by a dereferenceable
+  // attribute, we know exactly how many bytes are dereferenceable. If we can
+  // determine the exact offset to the attributed variable, we can use that
+  // information here.
+  Type *VTy = V->getType();
+  Type *Ty = VTy->getPointerElementType();
+
+  // Require ABI alignment for loads without alignment specification
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(Ty);
+
+  if (Ty->isSized()) {
+    APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+    const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+    if (Offset.isNonNegative())
+      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) &&
+          isAligned(BV, Offset, Align, DL))
+        return true;
+  }
+
+  SmallPtrSet<const Value *, 32> Visited;
+  return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI,
+                                              Visited);
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+                                    const Instruction *CtxI,
+                                    const DominatorTree *DT,
+                                    const TargetLibraryInfo *TLI) {
+  return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);
+}
+
+bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+                                        const Instruction *CtxI,
+                                        const DominatorTree *DT,
+                                        const TargetLibraryInfo *TLI) {
+  const Operator *Inst = dyn_cast<Operator>(V);
+  if (!Inst)
+    return false;
+
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (Inst->getOpcode()) {
+  default:
+    return true;
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // x / y is undefined if y == 0.
+    const APInt *V;
+    if (match(Inst->getOperand(1), m_APInt(V)))
+      return *V != 0;
+    return false;
+  }
+  case Instruction::SDiv:
+  case Instruction::SRem: {
+    // x / y is undefined if y == 0 or x == INT_MIN and y == -1
+    const APInt *Numerator, *Denominator;
+    if (!match(Inst->getOperand(1), m_APInt(Denominator)))
+      return false;
+    // We cannot hoist this division if the denominator is 0.
+    if (*Denominator == 0)
+      return false;
+    // It's safe to hoist if the denominator is not 0 or -1.
+    if (*Denominator != -1)
+      return true;
+    // At this point we know that the denominator is -1.  It is safe to hoist as
+    // long we know that the numerator is not INT_MIN.
+    if (match(Inst->getOperand(0), m_APInt(Numerator)))
+      return !Numerator->isMinSignedValue();
+    // The numerator *might* be MinSignedValue.
+    return false;
+  }
+  case Instruction::Load: {
+    const LoadInst *LI = cast<LoadInst>(Inst);
+    if (!LI->isUnordered() ||
+        // Speculative load may create a race that did not exist in the source.
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeThread) ||
+        // Speculative load may load data from dirty regions.
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeAddress))
+      return false;
+    const DataLayout &DL = LI->getModule()->getDataLayout();
+    return isDereferenceableAndAlignedPointer(
+        LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);
+  }
+  case Instruction::Call: {
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+      switch (II->getIntrinsicID()) {
+      // These synthetic intrinsics have no side-effects and just mark
+      // information about their operands.
+      // FIXME: There are other no-op synthetic instructions that potentially
+      // should be considered at least *safe* to speculate...
+      case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
+        return true;
+
+      case Intrinsic::bswap:
+      case Intrinsic::ctlz:
+      case Intrinsic::ctpop:
+      case Intrinsic::cttz:
+      case Intrinsic::objectsize:
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        return true;
+      // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
+      // errno like libm sqrt would.
+      case Intrinsic::sqrt:
+      case Intrinsic::fma:
+      case Intrinsic::fmuladd:
+      case Intrinsic::fabs:
+      case Intrinsic::minnum:
+      case Intrinsic::maxnum:
+        return true;
+      // TODO: some fp intrinsics are marked as having the same error handling
+      // as libm. They're safe to speculate when they won't error.
+      // TODO: are convert_{from,to}_fp16 safe?
+      // TODO: can we list target-specific intrinsics here?
+      default: break;
+      }
+    }
+    return false; // The called function could have undefined behavior or
+                  // side-effects, even if marked readnone nounwind.
+  }
+  case Instruction::VAArg:
+  case Instruction::Alloca:
+  case Instruction::Invoke:
+  case Instruction::PHI:
+  case Instruction::Store:
+  case Instruction::Ret:
+  case Instruction::Br:
+  case Instruction::IndirectBr:
+  case Instruction::Switch:
+  case Instruction::Unreachable:
+  case Instruction::Fence:
+  case Instruction::AtomicRMW:
+  case Instruction::AtomicCmpXchg:
+  case Instruction::LandingPad:
+  case Instruction::Resume:
+  case Instruction::CatchSwitch:
+  case Instruction::CatchPad:
+  case Instruction::CatchRet:
+  case Instruction::CleanupPad:
+  case Instruction::CleanupRet:
+    return false; // Misc instructions which have effects
+  }
+}
+
+bool llvm::mayBeMemoryDependent(const Instruction &I) {
+  return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+}
+
+/// Return true if we know that the specified value is never null.
+bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
+  // Alloca never returns null, malloc might.
+  if (isa<AllocaInst>(V)) return true;
+
+  // A byval, inalloca, or nonnull argument is never null.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
+
+  // A global variable in address space 0 is non null unless extern weak.
+  // Other address spaces may have null as a valid address for a global,
+  // so we can't assume anything.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return !GV->hasExternalWeakLinkage() &&
+           GV->getType()->getAddressSpace() == 0;
+
+  // A Load tagged w/nonnull metadata is never null. 
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+    return LI->getMetadata(LLVMContext::MD_nonnull);
+
+  if (auto CS = ImmutableCallSite(V))
+    if (CS.isReturnNonNull())
+      return true;
+
+  return false;
+}
+
+static bool isKnownNonNullFromDominatingCondition(const Value *V,
+                                                  const Instruction *CtxI,
+                                                  const DominatorTree *DT) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    const ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      const BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI)
+        continue;
+      
+      assert(BI->isConditional() && "uses a comparison!");
+
+      BasicBlock *NonNullSuccessor = nullptr;
+      CmpInst::Predicate Pred;
+
+      if (match(const_cast<ICmpInst*>(Cmp),
+                m_c_ICmp(Pred, m_Specific(V), m_Zero()))) {
+        if (Pred == ICmpInst::ICMP_EQ)
+          NonNullSuccessor = BI->getSuccessor(1);
+        else if (Pred == ICmpInst::ICMP_NE)
+          NonNullSuccessor = BI->getSuccessor(0);
+      }
+
+      if (NonNullSuccessor) {
+        BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+        if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
+                   const DominatorTree *DT, const TargetLibraryInfo *TLI) {
+  if (isKnownNonNull(V, TLI))
+    return true;
+
+  return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+}
+
+OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+                                                   const DataLayout &DL,
+                                                   AssumptionCache *AC,
+                                                   const Instruction *CxtI,
+                                                   const DominatorTree *DT) {
+  // Multiplying n * m significant bits yields a result of n + m significant
+  // bits. If the total number of significant bits does not exceed the
+  // result bit width (minus 1), there is no overflow.
+  // This means if we have enough leading zero bits in the operands
+  // we can guarantee that the result does not overflow.
+  // Ref: "Hacker's Delight" by Henry Warren
+  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+  APInt LHSKnownZero(BitWidth, 0);
+  APInt LHSKnownOne(BitWidth, 0);
+  APInt RHSKnownZero(BitWidth, 0);
+  APInt RHSKnownOne(BitWidth, 0);
+  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+                   DT);
+  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+                   DT);
+  // Note that underestimating the number of zero bits gives a more
+  // conservative answer.
+  unsigned ZeroBits = LHSKnownZero.countLeadingOnes() +
+                      RHSKnownZero.countLeadingOnes();
+  // First handle the easy case: if we have enough zero bits there's
+  // definitely no overflow.
+  if (ZeroBits >= BitWidth)
+    return OverflowResult::NeverOverflows;
+
+  // Get the largest possible values for each operand.
+  APInt LHSMax = ~LHSKnownZero;
+  APInt RHSMax = ~RHSKnownZero;
+
+  // We know the multiply operation doesn't overflow if the maximum values for
+  // each operand will not overflow after we multiply them together.
+  bool MaxOverflow;
+  LHSMax.umul_ov(RHSMax, MaxOverflow);
+  if (!MaxOverflow)
+    return OverflowResult::NeverOverflows;
+
+  // We know it always overflows if multiplying the smallest possible values for
+  // the operands also results in overflow.
+  bool MinOverflow;
+  LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+  if (MinOverflow)
+    return OverflowResult::AlwaysOverflows;
+
+  return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+                                                   const DataLayout &DL,
+                                                   AssumptionCache *AC,
+                                                   const Instruction *CxtI,
+                                                   const DominatorTree *DT) {
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+  if (LHSKnownNonNegative || LHSKnownNegative) {
+    bool RHSKnownNonNegative, RHSKnownNegative;
+    ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+                   AC, CxtI, DT);
+
+    if (LHSKnownNegative && RHSKnownNegative) {
+      // The sign bit is set in both cases: this MUST overflow.
+      // Create a simple add instruction, and insert it into the struct.
+      return OverflowResult::AlwaysOverflows;
+    }
+
+    if (LHSKnownNonNegative && RHSKnownNonNegative) {
+      // The sign bit is clear in both cases: this CANNOT overflow.
+      // Create a simple add instruction, and insert it into the struct.
+      return OverflowResult::NeverOverflows;
+    }
+  }
+
+  return OverflowResult::MayOverflow;
+}
+
+static OverflowResult computeOverflowForSignedAdd(
+    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+  if (Add && Add->hasNoSignedWrap()) {
+    return OverflowResult::NeverOverflows;
+  }
+
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  bool RHSKnownNonNegative, RHSKnownNegative;
+  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+
+  if ((LHSKnownNonNegative && RHSKnownNegative) ||
+      (LHSKnownNegative && RHSKnownNonNegative)) {
+    // The sign bits are opposite: this CANNOT overflow.
+    return OverflowResult::NeverOverflows;
+  }
+
+  // The remaining code needs Add to be available. Early returns if not so.
+  if (!Add)
+    return OverflowResult::MayOverflow;
+
+  // If the sign of Add is the same as at least one of the operands, this add
+  // CANNOT overflow. This is particularly useful when the sum is
+  // @llvm.assume'ed non-negative rather than proved so from analyzing its
+  // operands.
+  bool LHSOrRHSKnownNonNegative =
+      (LHSKnownNonNegative || RHSKnownNonNegative);
+  bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+  if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
+    bool AddKnownNonNegative, AddKnownNegative;
+    ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
+                   /*Depth=*/0, AC, CxtI, DT);
+    if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
+        (AddKnownNegative && LHSOrRHSKnownNegative)) {
+      return OverflowResult::NeverOverflows;
+    }
+  }
+
+  return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
+                                       Add, DL, AC, CxtI, DT);
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
+  // FIXME: This conservative implementation can be relaxed. E.g. most
+  // atomic operations are guaranteed to terminate on most platforms
+  // and most functions terminate.
+
+  return !I->isAtomic() &&       // atomics may never succeed on some platforms
+         !isa<CallInst>(I) &&    // could throw and might not terminate
+         !isa<InvokeInst>(I) &&  // might not terminate and could throw to
+                                 //   non-successor (see bug 24185 for details).
+         !isa<ResumeInst>(I) &&  // has no successors
+         !isa<ReturnInst>(I);    // has no successors
+}
+
+bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+                                                  const Loop *L) {
+  // The loop header is guaranteed to be executed for every iteration.
+  //
+  // FIXME: Relax this constraint to cover all basic blocks that are
+  // guaranteed to be executed at every iteration.
+  if (I->getParent() != L->getHeader()) return false;
+
+  for (const Instruction &LI : *L->getHeader()) {
+    if (&LI == I) return true;
+    if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
+  }
+  llvm_unreachable("Instruction not contained in its own parent basic block.");
+}
+
+bool llvm::propagatesFullPoison(const Instruction *I) {
+  switch (I->getOpcode()) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Xor:
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::AddrSpaceCast:
+      // These operations all propagate poison unconditionally. Note that poison
+      // is not any particular value, so xor or subtraction of poison with
+      // itself still yields poison, not zero.
+      return true;
+
+    case Instruction::AShr:
+    case Instruction::SExt:
+      // For these operations, one bit of the input is replicated across
+      // multiple output bits. A replicated poison bit is still poison.
+      return true;
+
+    case Instruction::Shl: {
+      // Left shift *by* a poison value is poison. The number of
+      // positions to shift is unsigned, so no negative values are
+      // possible there. Left shift by zero places preserves poison. So
+      // it only remains to consider left shift of poison by a positive
+      // number of places.
+      //
+      // A left shift by a positive number of places leaves the lowest order bit
+      // non-poisoned. However, if such a shift has a no-wrap flag, then we can
+      // make the poison operand violate that flag, yielding a fresh full-poison
+      // value.
+      auto *OBO = cast<OverflowingBinaryOperator>(I);
+      return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+    }
+
+    case Instruction::Mul: {
+      // A multiplication by zero yields a non-poison zero result, so we need to
+      // rule out zero as an operand. Conservatively, multiplication by a
+      // non-zero constant is not multiplication by zero.
+      //
+      // Multiplication by a non-zero constant can leave some bits
+      // non-poisoned. For example, a multiplication by 2 leaves the lowest
+      // order bit unpoisoned. So we need to consider that.
+      //
+      // Multiplication by 1 preserves poison. If the multiplication has a
+      // no-wrap flag, then we can make the poison operand violate that flag
+      // when multiplied by any integer other than 0 and 1.
+      auto *OBO = cast<OverflowingBinaryOperator>(I);
+      if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
+        for (Value *V : OBO->operands()) {
+          if (auto *CI = dyn_cast<ConstantInt>(V)) {
+            // A ConstantInt cannot yield poison, so we can assume that it is
+            // the other operand that is poison.
+            return !CI->isZero();
+          }
+        }
+      }
+      return false;
+    }
+
+    case Instruction::GetElementPtr:
+      // A GEP implicitly represents a sequence of additions, subtractions,
+      // truncations, sign extensions and multiplications. The multiplications
+      // are by the non-zero sizes of some set of types, so we do not have to be
+      // concerned with multiplication by zero. If the GEP is in-bounds, then
+      // these operations are implicitly no-signed-wrap so poison is propagated
+      // by the arguments above for Add, Sub, Trunc, SExt and Mul.
+      return cast<GEPOperator>(I)->isInBounds();
+
+    default:
+      return false;
+  }
+}
+
+const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
+  switch (I->getOpcode()) {
+    case Instruction::Store:
+      return cast<StoreInst>(I)->getPointerOperand();
+
+    case Instruction::Load:
+      return cast<LoadInst>(I)->getPointerOperand();
+
+    case Instruction::AtomicCmpXchg:
+      return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+
+    case Instruction::AtomicRMW:
+      return cast<AtomicRMWInst>(I)->getPointerOperand();
+
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      return I->getOperand(1);
+
+    default:
+      return nullptr;
+  }
+}
+
+bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+  // We currently only look for uses of poison values within the same basic
+  // block, as that makes it easier to guarantee that the uses will be
+  // executed given that PoisonI is executed.
+  //
+  // FIXME: Expand this to consider uses beyond the same basic block. To do
+  // this, look out for the distinction between post-dominance and strong
+  // post-dominance.
+  const BasicBlock *BB = PoisonI->getParent();
+
+  // Set of instructions that we have proved will yield poison if PoisonI
+  // does.
+  SmallSet<const Value *, 16> YieldsPoison;
+  YieldsPoison.insert(PoisonI);
+
+  for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end();
+       I != E; ++I) {
+    if (&*I != PoisonI) {
+      const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I);
+      if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
+      if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+        return false;
+    }
+
+    // Mark poison that propagates from I through uses of I.
+    if (YieldsPoison.count(&*I)) {
+      for (const User *User : I->users()) {
+        const Instruction *UserI = cast<Instruction>(User);
+        if (UserI->getParent() == BB && propagatesFullPoison(UserI))
+          YieldsPoison.insert(User);
+      }
+    }
+  }
+  return false;
+}
+
+static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+  if (FMF.noNaNs())
+    return true;
+
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isNaN();
+  return false;
+}
+
+static bool isKnownNonZero(Value *V) {
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isZero();
+  return false;
+}
+
+static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
+                                              FastMathFlags FMF,
+                                              Value *CmpLHS, Value *CmpRHS,
+                                              Value *TrueVal, Value *FalseVal,
+                                              Value *&LHS, Value *&RHS) {
+  LHS = CmpLHS;
+  RHS = CmpRHS;
+
+  // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
+  // return inconsistent results between implementations.
+  //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+  //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+  // Therefore we behave conservatively and only proceed if at least one of the
+  // operands is known to not be zero, or if we don't care about signed zeroes.
+  switch (Pred) {
+  default: break;
+  case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
+    if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+        !isKnownNonZero(CmpRHS))
+      return {SPF_UNKNOWN, SPNB_NA, false};
+  }
+
+  SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
+  bool Ordered = false;
+
+  // When given one NaN and one non-NaN input:
+  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
+  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
+  //     ordered comparison fails), which could be NaN or non-NaN.
+  // so here we discover exactly what NaN behavior is required/accepted.
+  if (CmpInst::isFPPredicate(Pred)) {
+    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
+    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
+
+    if (LHSSafe && RHSSafe) {
+      // Both operands are known non-NaN.
+      NaNBehavior = SPNB_RETURNS_ANY;
+    } else if (CmpInst::isOrdered(Pred)) {
+      // An ordered comparison will return false when given a NaN, so it
+      // returns the RHS.
+      Ordered = true;
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
+    } else {
+      Ordered = false;
+      // An unordered comparison will return true when given a NaN, so it
+      // returns the LHS.
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
+    }
+  }
+
+  if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+    std::swap(CmpLHS, CmpRHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+    if (NaNBehavior == SPNB_RETURNS_NAN)
+      NaNBehavior = SPNB_RETURNS_OTHER;
+    else if (NaNBehavior == SPNB_RETURNS_OTHER)
+      NaNBehavior = SPNB_RETURNS_NAN;
+    Ordered = !Ordered;
+  }
+
+  // ([if]cmp X, Y) ? X : Y
+  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
+    switch (Pred) {
+    default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_OGT:
+    case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_OLT:
+    case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
+    }
+  }
+
+  if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+    if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
+        (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
+
+      // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
+      // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
+      if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+        return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
+      }
+
+      // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
+      // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
+      if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+        return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
+      }
+    }
+    
+    // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
+    if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
+      if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() &&
+          (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
+           match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
+        LHS = TrueVal;
+        RHS = FalseVal;
+        return {SPF_SMIN, SPNB_NA, false};
+      }
+    }
+  }
+
+  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
+static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
+                              Instruction::CastOps *CastOp) {
+  CastInst *CI = dyn_cast<CastInst>(V1);
+  Constant *C = dyn_cast<Constant>(V2);
+  CastInst *CI2 = dyn_cast<CastInst>(V2);
+  if (!CI)
+    return nullptr;
+  *CastOp = CI->getOpcode();
+
+  if (CI2) {
+    // If V1 and V2 are both the same cast from the same type, we can look
+    // through V1.
+    if (CI2->getOpcode() == CI->getOpcode() &&
+        CI2->getSrcTy() == CI->getSrcTy())
+      return CI2->getOperand(0);
+    return nullptr;
+  } else if (!C) {
+    return nullptr;
+  }
+
+  if (isa<SExtInst>(CI) && CmpI->isSigned()) {
+    Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
+    // This is only valid if the truncated value can be sign-extended
+    // back to the original value.
+    if (ConstantExpr::getSExt(T, C->getType()) == C)
+      return T;
+    return nullptr;
+  }
+  if (isa<ZExtInst>(CI) && CmpI->isUnsigned())
+    return ConstantExpr::getTrunc(C, CI->getSrcTy());
+
+  if (isa<TruncInst>(CI))
+    return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
+
+  if (isa<FPToUIInst>(CI))
+    return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<FPToSIInst>(CI))
+    return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<UIToFPInst>(CI))
+    return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+
+  if (isa<SIToFPInst>(CI))
+    return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+
+  if (isa<FPTruncInst>(CI))
+    return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
+
+  if (isa<FPExtInst>(CI))
+    return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
+
+  return nullptr;
+}
+
+SelectPatternResult llvm::matchSelectPattern(Value *V,
+                                             Value *&LHS, Value *&RHS,
+                                             Instruction::CastOps *CastOp) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
+
+  CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
+  if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
+
+  CmpInst::Predicate Pred = CmpI->getPredicate();
+  Value *CmpLHS = CmpI->getOperand(0);
+  Value *CmpRHS = CmpI->getOperand(1);
+  Value *TrueVal = SI->getTrueValue();
+  Value *FalseVal = SI->getFalseValue();
+  FastMathFlags FMF;
+  if (isa<FPMathOperator>(CmpI))
+    FMF = CmpI->getFastMathFlags();
+
+  // Bail out early.
+  if (CmpI->isEquality())
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // Deal with type mismatches.
+  if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
+    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
+                                  cast<CastInst>(TrueVal)->getOperand(0), C,
+                                  LHS, RHS);
+    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
+                                  C, cast<CastInst>(FalseVal)->getOperand(0),
+                                  LHS, RHS);
+  }
+  return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
+                              LHS, RHS);
+}
+
+ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
+  const unsigned NumRanges = Ranges.getNumOperands() / 2;
+  assert(NumRanges >= 1 && "Must have at least one range!");
+  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
+
+  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
+  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
+
+  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
+
+  for (unsigned i = 1; i < NumRanges; ++i) {
+    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+
+    // Note: unionWith will potentially create a range that contains values not
+    // contained in any of the original N ranges.
+    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
+  }
+
+  return CR;
+}
+
+/// Return true if "icmp Pred LHS RHS" is always true.
+static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                            const DataLayout &DL, unsigned Depth,
+                            AssumptionCache *AC, const Instruction *CxtI,
+                            const DominatorTree *DT) {
+  assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
+  if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
+    return true;
+
+  switch (Pred) {
+  default:
+    return false;
+
+  case CmpInst::ICMP_SLE: {
+    const APInt *C;
+
+    // LHS s<= LHS +_{nsw} C   if C >= 0
+    if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
+      return !C->isNegative();
+    return false;
+  }
+
+  case CmpInst::ICMP_ULE: {
+    const APInt *C;
+
+    // LHS u<= LHS +_{nuw} C   for any C
+    if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
+      return true;
+
+    // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
+    auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+                                       const APInt *&CA, const APInt *&CB) {
+      if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
+          match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
+        return true;
+
+      // If X & C == 0 then (X | C) == X +_{nuw} C
+      if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
+          match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
+        unsigned BitWidth = CA->getBitWidth();
+        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+        computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+
+        if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+          return true;
+      }
+
+      return false;
+    };
+
+    Value *X;
+    const APInt *CLHS, *CRHS;
+    if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
+      return CLHS->ule(*CRHS);
+
+    return false;
+  }
+  }
+}
+
+/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
+/// ALHS ARHS" is true.
+static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS,
+                                  Value *ARHS, Value *BLHS, Value *BRHS,
+                                  const DataLayout &DL, unsigned Depth,
+                                  AssumptionCache *AC, const Instruction *CxtI,
+                                  const DominatorTree *DT) {
+  switch (Pred) {
+  default:
+    return false;
+
+  case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
+                           DT) &&
+           isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI,
+                           DT);
+
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
+                           DT) &&
+           isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI,
+                           DT);
+  }
+}
+
+bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+                              unsigned Depth, AssumptionCache *AC,
+                              const Instruction *CxtI,
+                              const DominatorTree *DT) {
+  assert(LHS->getType() == RHS->getType() && "mismatched type");
+  Type *OpTy = LHS->getType();
+  assert(OpTy->getScalarType()->isIntegerTy(1));
+
+  // LHS ==> RHS by definition
+  if (LHS == RHS) return true;
+
+  if (OpTy->isVectorTy())
+    // TODO: extending the code below to handle vectors
+    return false;
+  assert(OpTy->isIntegerTy(1) && "implied by above");
+
+  ICmpInst::Predicate APred, BPred;
+  Value *ALHS, *ARHS;
+  Value *BLHS, *BRHS;
+
+  if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
+      !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+    return false;
+
+  if (APred == BPred)
+    return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
+                                 CxtI, DT);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
new file mode 100644
index 0000000..4b244ec
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -0,0 +1,567 @@
+//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines vectorizer utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+/// \brief Identify if the intrinsic is trivially vectorizable.
+/// This method returns true if the intrinsic's argument types are all
+/// scalars for the scalar form of the intrinsic and all vectors for
+/// the vector form of the intrinsic.
+bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
+  switch (ID) {
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::log:
+  case Intrinsic::log10:
+  case Intrinsic::log2:
+  case Intrinsic::fabs:
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::copysign:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+  case Intrinsic::bswap:
+  case Intrinsic::ctpop:
+  case Intrinsic::pow:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::powi:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Identifies if the intrinsic has a scalar operand. It check for
+/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
+                                        unsigned ScalarOpdIdx) {
+  switch (ID) {
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::powi:
+    return (ScalarOpdIdx == 1);
+  default:
+    return false;
+  }
+}
+
+/// \brief Check call has a unary float signature
+/// It checks following:
+/// a) call should have a single argument
+/// b) argument type should be floating point type
+/// c) call instruction type and argument type should be same
+/// d) call should only reads memory.
+/// If all these condition is met then return ValidIntrinsicID
+/// else return not_intrinsic.
+Intrinsic::ID
+llvm::checkUnaryFloatSignature(const CallInst &I,
+                               Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+/// \brief Check call has a binary float signature
+/// It checks following:
+/// a) call should have 2 arguments.
+/// b) arguments type should be floating point type
+/// c) call instruction type and arguments type should be same
+/// d) call should only reads memory.
+/// If all these condition is met then return ValidIntrinsicID
+/// else return not_intrinsic.
+Intrinsic::ID
+llvm::checkBinaryFloatSignature(const CallInst &I,
+                                Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 2 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      I.getType() != I.getArgOperand(1)->getType() || !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+/// \brief Returns intrinsic ID for call.
+/// For the input call instruction it finds mapping intrinsic and returns
+/// its ID, in case it does not found it return not_intrinsic.
+Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
+                                          const TargetLibraryInfo *TLI) {
+  // If we have an intrinsic call, check if it is trivially vectorizable.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+    Intrinsic::ID ID = II->getIntrinsicID();
+    if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
+        ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
+      return ID;
+    return Intrinsic::not_intrinsic;
+  }
+
+  if (!TLI)
+    return Intrinsic::not_intrinsic;
+
+  LibFunc::Func Func;
+  Function *F = CI->getCalledFunction();
+  // We're going to make assumptions on the semantics of the functions, check
+  // that the target knows that it's available in this environment and it does
+  // not have local linkage.
+  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
+    return Intrinsic::not_intrinsic;
+
+  // Otherwise check if we have a call to a function that can be turned into a
+  // vector intrinsic.
+  switch (Func) {
+  default:
+    break;
+  case LibFunc::sin:
+  case LibFunc::sinf:
+  case LibFunc::sinl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
+  case LibFunc::cos:
+  case LibFunc::cosf:
+  case LibFunc::cosl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
+  case LibFunc::exp:
+  case LibFunc::expf:
+  case LibFunc::expl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
+  case LibFunc::exp2:
+  case LibFunc::exp2f:
+  case LibFunc::exp2l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
+  case LibFunc::log:
+  case LibFunc::logf:
+  case LibFunc::logl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log);
+  case LibFunc::log10:
+  case LibFunc::log10f:
+  case LibFunc::log10l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
+  case LibFunc::log2:
+  case LibFunc::log2f:
+  case LibFunc::log2l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
+  case LibFunc::fabs:
+  case LibFunc::fabsf:
+  case LibFunc::fabsl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
+  case LibFunc::fmin:
+  case LibFunc::fminf:
+  case LibFunc::fminl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::minnum);
+  case LibFunc::fmax:
+  case LibFunc::fmaxf:
+  case LibFunc::fmaxl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::maxnum);
+  case LibFunc::copysign:
+  case LibFunc::copysignf:
+  case LibFunc::copysignl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
+  case LibFunc::floor:
+  case LibFunc::floorf:
+  case LibFunc::floorl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
+  case LibFunc::ceil:
+  case LibFunc::ceilf:
+  case LibFunc::ceill:
+    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
+  case LibFunc::trunc:
+  case LibFunc::truncf:
+  case LibFunc::truncl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
+  case LibFunc::rint:
+  case LibFunc::rintf:
+  case LibFunc::rintl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
+  case LibFunc::nearbyint:
+  case LibFunc::nearbyintf:
+  case LibFunc::nearbyintl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
+  case LibFunc::round:
+  case LibFunc::roundf:
+  case LibFunc::roundl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::round);
+  case LibFunc::pow:
+  case LibFunc::powf:
+  case LibFunc::powl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
+  }
+
+  return Intrinsic::not_intrinsic;
+}
+
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
+  const DataLayout &DL = Gep->getModule()->getDataLayout();
+  unsigned LastOperand = Gep->getNumOperands() - 1;
+  unsigned GEPAllocSize = DL.getTypeAllocSize(
+      cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+  // Walk backwards and try to peel off zeros.
+  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+    // Find the type we're currently indexing into.
+    gep_type_iterator GEPTI = gep_type_begin(Gep);
+    std::advance(GEPTI, LastOperand - 1);
+
+    // If it's a type with the same allocation size as the result of the GEP we
+    // can peel off the zero index.
+    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+      break;
+    --LastOperand;
+  }
+
+  return LastOperand;
+}
+
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// operand, it returns that instead.
+Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP)
+    return Ptr;
+
+  unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+  // Check that all of the gep indices are uniform except for our induction
+  // operand.
+  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+    if (i != InductionOperand &&
+        !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+      return Ptr;
+  return GEP->getOperand(InductionOperand);
+}
+
+/// \brief If a value has only one user that is a CastInst, return it.
+Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+  Value *UniqueCast = nullptr;
+  for (User *U : Ptr->users()) {
+    CastInst *CI = dyn_cast<CastInst>(U);
+    if (CI && CI->getType() == Ty) {
+      if (!UniqueCast)
+        UniqueCast = CI;
+      else
+        return nullptr;
+    }
+  }
+  return UniqueCast;
+}
+
+/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
+/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
+Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+  if (!PtrTy || PtrTy->isAggregateType())
+    return nullptr;
+
+  // Try to remove a gep instruction to make the pointer (actually index at this
+  // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+  // pointer, otherwise, we are analyzing the index.
+  Value *OrigPtr = Ptr;
+
+  // The size of the pointer access.
+  int64_t PtrAccessSize = 1;
+
+  Ptr = stripGetElementPtr(Ptr, SE, Lp);
+  const SCEV *V = SE->getSCEV(Ptr);
+
+  if (Ptr != OrigPtr)
+    // Strip off casts.
+    while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+      V = C->getOperand();
+
+  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+  if (!S)
+    return nullptr;
+
+  V = S->getStepRecurrence(*SE);
+  if (!V)
+    return nullptr;
+
+  // Strip off the size of access multiplication if we are still analyzing the
+  // pointer.
+  if (OrigPtr == Ptr) {
+    const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+    DL.getTypeAllocSize(PtrTy->getElementType());
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+      if (M->getOperand(0)->getSCEVType() != scConstant)
+        return nullptr;
+
+      const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
+
+      // Huge step value - give up.
+      if (APStepVal.getBitWidth() > 64)
+        return nullptr;
+
+      int64_t StepVal = APStepVal.getSExtValue();
+      if (PtrAccessSize != StepVal)
+        return nullptr;
+      V = M->getOperand(1);
+    }
+  }
+
+  // Strip off casts.
+  Type *StripedOffRecurrenceCast = nullptr;
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+    StripedOffRecurrenceCast = C->getType();
+    V = C->getOperand();
+  }
+
+  // Look for the loop invariant symbolic value.
+  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+  if (!U)
+    return nullptr;
+
+  Value *Stride = U->getValue();
+  if (!Lp->isLoopInvariant(Stride))
+    return nullptr;
+
+  // If we have stripped off the recurrence cast we have to make sure that we
+  // return the value that is used in this loop so that we can replace it later.
+  if (StripedOffRecurrenceCast)
+    Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+  return Stride;
+}
+
+/// \brief Given a vector and an element number, see if the scalar value is
+/// already around as a register, for example if it were inserted then extracted
+/// from the vector.
+Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
+  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+  VectorType *VTy = cast<VectorType>(V->getType());
+  unsigned Width = VTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
+    return UndefValue::get(VTy->getElementType());
+
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->getAggregateElement(EltNo);
+
+  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert to a variable element, we don't know what it is.
+    if (!isa<ConstantInt>(III->getOperand(2)))
+      return nullptr;
+    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+    // If this is an insert to the element we are looking for, return the
+    // inserted value.
+    if (EltNo == IIElt)
+      return III->getOperand(1);
+
+    // Otherwise, the insertelement doesn't modify the value, recurse on its
+    // vector input.
+    return findScalarElement(III->getOperand(0), EltNo);
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+    int InEl = SVI->getMaskValue(EltNo);
+    if (InEl < 0)
+      return UndefValue::get(VTy->getElementType());
+    if (InEl < (int)LHSWidth)
+      return findScalarElement(SVI->getOperand(0), InEl);
+    return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+  }
+
+  // Extract a value from a vector add operation with a constant zero.
+  Value *Val = nullptr; Constant *Con = nullptr;
+  if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
+    if (Constant *Elt = Con->getAggregateElement(EltNo))
+      if (Elt->isNullValue())
+        return findScalarElement(Val, EltNo);
+
+  // Otherwise, we don't know.
+  return nullptr;
+}
+
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// This function is not fully general. It checks only 2 cases:
+/// the input value is (1) a splat constants vector or (2) a sequence
+/// of instructions that broadcast a single value into a vector.
+///
+const llvm::Value *llvm::getSplatValue(const Value *V) {
+
+  if (auto *C = dyn_cast<Constant>(V))
+    if (isa<VectorType>(V->getType()))
+      return C->getSplatValue();
+
+  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
+  if (!ShuffleInst)
+    return nullptr;
+  // All-zero (or undef) shuffle mask elements.
+  for (int MaskElt : ShuffleInst->getShuffleMask())
+    if (MaskElt != 0 && MaskElt != -1)
+      return nullptr;
+  // The first shuffle source is 'insertelement' with index 0.
+  auto *InsertEltInst =
+    dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
+  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
+      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+    return nullptr;
+
+  return InsertEltInst->getOperand(1);
+}
+
+MapVector<Instruction *, uint64_t>
+llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
+                               const TargetTransformInfo *TTI) {
+
+  // DemandedBits will give us every value's live-out bits. But we want
+  // to ensure no extra casts would need to be inserted, so every DAG
+  // of connected values must have the same minimum bitwidth.
+  EquivalenceClasses<Value *> ECs;
+  SmallVector<Value *, 16> Worklist;
+  SmallPtrSet<Value *, 4> Roots;
+  SmallPtrSet<Value *, 16> Visited;
+  DenseMap<Value *, uint64_t> DBits;
+  SmallPtrSet<Instruction *, 4> InstructionSet;
+  MapVector<Instruction *, uint64_t> MinBWs;
+
+  // Determine the roots. We work bottom-up, from truncs or icmps.
+  bool SeenExtFromIllegalType = false;
+  for (auto *BB : Blocks)
+    for (auto &I : *BB) {
+      InstructionSet.insert(&I);
+
+      if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
+          !TTI->isTypeLegal(I.getOperand(0)->getType()))
+        SeenExtFromIllegalType = true;
+
+      // Only deal with non-vector integers up to 64-bits wide.
+      if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
+          !I.getType()->isVectorTy() &&
+          I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
+        // Don't make work for ourselves. If we know the loaded type is legal,
+        // don't add it to the worklist.
+        if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
+          continue;
+
+        Worklist.push_back(&I);
+        Roots.insert(&I);
+      }
+    }
+  // Early exit.
+  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
+    return MinBWs;
+
+  // Now proceed breadth-first, unioning values together.
+  while (!Worklist.empty()) {
+    Value *Val = Worklist.pop_back_val();
+    Value *Leader = ECs.getOrInsertLeaderValue(Val);
+
+    if (Visited.count(Val))
+      continue;
+    Visited.insert(Val);
+
+    // Non-instructions terminate a chain successfully.
+    if (!isa<Instruction>(Val))
+      continue;
+    Instruction *I = cast<Instruction>(Val);
+
+    // If we encounter a type that is larger than 64 bits, we can't represent
+    // it so bail out.
+    if (DB.getDemandedBits(I).getBitWidth() > 64)
+      return MapVector<Instruction *, uint64_t>();
+
+    uint64_t V = DB.getDemandedBits(I).getZExtValue();
+    DBits[Leader] |= V;
+
+    // Casts, loads and instructions outside of our range terminate a chain
+    // successfully.
+    if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
+        !InstructionSet.count(I))
+      continue;
+
+    // Unsafe casts terminate a chain unsuccessfully. We can't do anything
+    // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
+    // transform anything that relies on them.
+    if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
+        !I->getType()->isIntegerTy()) {
+      DBits[Leader] |= ~0ULL;
+      continue;
+    }
+
+    // We don't modify the types of PHIs. Reductions will already have been
+    // truncated if possible, and inductions' sizes will have been chosen by
+    // indvars.
+    if (isa<PHINode>(I))
+      continue;
+
+    if (DBits[Leader] == ~0ULL)
+      // All bits demanded, no point continuing.
+      continue;
+
+    for (Value *O : cast<User>(I)->operands()) {
+      ECs.unionSets(Leader, O);
+      Worklist.push_back(O);
+    }
+  }
+
+  // Now we've discovered all values, walk them to see if there are
+  // any users we didn't see. If there are, we can't optimize that
+  // chain.
+  for (auto &I : DBits)
+    for (auto *U : I.first->users())
+      if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
+        DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
+
+  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
+    uint64_t LeaderDemandedBits = 0;
+    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
+      LeaderDemandedBits |= DBits[*MI];
+
+    uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
+                     llvm::countLeadingZeros(LeaderDemandedBits);
+    // Round up to a power of 2
+    if (!isPowerOf2_64((uint64_t)MinBW))
+      MinBW = NextPowerOf2(MinBW);
+    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
+      if (!isa<Instruction>(*MI))
+        continue;
+      Type *Ty = (*MI)->getType();
+      if (Roots.count(*MI))
+        Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
+      if (MinBW < Ty->getScalarSizeInBits())
+        MinBWs[cast<Instruction>(*MI)] = MinBW;
+    }
+  }
+
+  return MinBWs;
+}