71 files changed, 10083 insertions, 5003 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 84da76b..4c29aea 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -332,8 +332,8 @@ FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
 
 ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
                                     const MemoryLocation &Loc) {
-  // Be conservative in the face of volatile/atomic.
-  if (!L->isUnordered())
+  // Be conservative in the face of atomic.
+  if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
     return MRI_ModRef;
 
   // If the load address doesn't alias the given address, it doesn't read
@@ -347,8 +347,8 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
 
 ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
                                     const MemoryLocation &Loc) {
-  // Be conservative in the face of volatile/atomic.
-  if (!S->isUnordered())
+  // Be conservative in the face of atomic.
+  if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
     return MRI_ModRef;
 
   if (Loc.Ptr) {
@@ -367,6 +367,14 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
   return MRI_Mod;
 }
 
+ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+  // If we know that the location is a constant memory location, the fence
+  // cannot modify this location.
+  if (Loc.Ptr && pointsToConstantMemory(Loc))
+    return MRI_Ref;
+  return MRI_ModRef;
+}
+
 ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
                                     const MemoryLocation &Loc) {
 
@@ -689,7 +697,7 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
 
 bool llvm::isNoAliasCall(const Value *V) {
   if (auto CS = ImmutableCallSite(V))
-    return CS.paramHasAttr(0, Attribute::NoAlias);
+    return CS.hasRetAttr(Attribute::NoAlias);
   return false;
 }
 
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 4d6a6c9..435c782 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -14,9 +14,9 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 701b0e1..4dfa254 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -17,8 +17,8 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
@@ -199,9 +199,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
   // Check the unknown instructions...
   if (!UnknownInsts.empty()) {
     for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
-      if (AA.getModRefInfo(UnknownInsts[i],
-                           MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
-        return true;
+      if (auto *Inst = getUnknownInst(i))
+        if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) !=
+            MRI_NoModRef)
+          return true;
   }
 
   return false;
@@ -217,10 +218,12 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
     return false;
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
-    ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
-    if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
-        AA.getModRefInfo(C2, C1) != MRI_NoModRef)
-      return true;
+    if (auto *UnknownInst = getUnknownInst(i)) {
+      ImmutableCallSite C1(UnknownInst), C2(Inst);
+      if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+          AA.getModRefInfo(C2, C1) != MRI_NoModRef)
+        return true;
+    }
   }
 
   for (iterator I = begin(), E = end(); I != E; ++I)
@@ -471,7 +474,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
 
     // If there are any call sites in the alias set, add them to this AST.
     for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
-      add(AS.UnknownInsts[i]);
+      if (auto *Inst = AS.getUnknownInst(i))
+        add(Inst);
 
     // Loop over all of the pointers in this alias set.
     for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
@@ -489,19 +493,6 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
 // dangling pointers to deleted instructions.
 //
 void AliasSetTracker::deleteValue(Value *PtrVal) {
-  // If this is a call instruction, remove the callsite from the appropriate
-  // AliasSet (if present).
-  if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
-    if (Inst->mayReadOrWriteMemory()) {
-      // Scan all the alias sets to see if this call site is contained.
-      for (iterator I = begin(), E = end(); I != E;) {
-        iterator Cur = I++;
-        if (!Cur->Forward)
-          Cur->removeUnknownInst(*this, Inst);
-      }
-    }
-  }
-
   // First, look up the PointerRec for this pointer.
   PointerMapType::iterator I = PointerMap.find_as(PtrVal);
   if (I == PointerMap.end()) return;  // Noop
@@ -633,7 +624,8 @@ void AliasSet::print(raw_ostream &OS) const {
     OS << "\n    " << UnknownInsts.size() << " Unknown instructions: ";
     for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
       if (i) OS << ", ";
-      UnknownInsts[i]->printAsOperand(OS);
+      if (auto *I = getUnknownInst(i))
+        I->printAsOperand(OS);
     }
   }
   OS << "\n";
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 0e7cf40..0e0b5c9 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -57,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeLazyBranchProbabilityInfoPassPass(Registry);
   initializeLazyBlockFrequencyInfoPassPass(Registry);
   initializeLazyValueInfoWrapperPassPass(Registry);
+  initializeLazyValueInfoPrinterPass(Registry);
   initializeLintPass(Registry);
   initializeLoopInfoWrapperPassPass(Registry);
   initializeMemDepPrinterPass(Registry);
@@ -78,6 +79,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeTypeBasedAAWrapperPassPass(Registry);
   initializeScopedNoAliasAAWrapperPassPass(Registry);
   initializeLCSSAVerificationPassPass(Registry);
+  initializeMemorySSAWrapperPassPass(Registry);
+  initializeMemorySSAPrinterLegacyPassPass(Registry);
 }
 
 void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
index 5851594..3ff2789 100644
--- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -24,15 +24,21 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) {
+static cl::opt<bool>
+    VerifyAssumptionCache("verify-assumption-cache", cl::Hidden,
+                          cl::desc("Enable verification of assumption cache"),
+                          cl::init(false));
+
+SmallVector<WeakTrackingVH, 1> &
+AssumptionCache::getOrInsertAffectedValues(Value *V) {
   // Try using find_as first to avoid creating extra value handles just for the
   // purpose of doing the lookup.
   auto AVI = AffectedValues.find_as(V);
   if (AVI != AffectedValues.end())
     return AVI->second;
 
-  auto AVIP = AffectedValues.insert({
-      AffectedValueCallbackVH(V, this), SmallVector<WeakVH, 1>()});
+  auto AVIP = AffectedValues.insert(
+      {AffectedValueCallbackVH(V, this), SmallVector<WeakTrackingVH, 1>()});
   return AVIP.first->second;
 }
 
@@ -47,9 +53,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
     } else if (auto *I = dyn_cast<Instruction>(V)) {
       Affected.push_back(I);
 
-      if (I->getOpcode() == Instruction::BitCast ||
-          I->getOpcode() == Instruction::PtrToInt) {
-        auto *Op = I->getOperand(0);
+      // Peek through unary operators to find the source of the condition.
+      Value *Op;
+      if (match(I, m_BitCast(m_Value(Op))) ||
+          match(I, m_PtrToInt(m_Value(Op))) ||
+          match(I, m_Not(m_Value(Op)))) {
         if (isa<Instruction>(Op) || isa<Argument>(Op))
           Affected.push_back(Op);
       }
@@ -76,18 +84,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
         Value *B;
         ConstantInt *C;
         // (A & B) or (A | B) or (A ^ B).
-        if (match(V,
-                  m_CombineOr(m_And(m_Value(A), m_Value(B)),
-                    m_CombineOr(m_Or(m_Value(A), m_Value(B)),
-                                m_Xor(m_Value(A), m_Value(B)))))) {
+        if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
           AddAffected(A);
           AddAffected(B);
         // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant.
-        } else if (match(V,
-                         m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)),
-                           m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)),
-                                       m_AShr(m_Value(A),
-                                              m_ConstantInt(C)))))) {
+        } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) {
           AddAffected(A);
         }
       };
@@ -229,7 +230,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
 }
 
 void AssumptionCacheTracker::verifyAnalysis() const {
-#ifndef NDEBUG
+  // FIXME: In the long term the verifier should not be controllable with a
+  // flag. We should either fix all passes to correctly update the assumption
+  // cache and enable the verifier unconditionally or somehow arrange for the
+  // assumption list to be updated automatically by passes.
+  if (!VerifyAssumptionCache)
+    return;
+
   SmallPtrSet<const CallInst *, 4> AssumptionSet;
   for (const auto &I : AssumptionCaches) {
     for (auto &VH : I.second->assumptions())
@@ -238,11 +245,10 @@ void AssumptionCacheTracker::verifyAnalysis() const {
 
     for (const BasicBlock &B : cast<Function>(*I.first))
       for (const Instruction &II : B)
-        if (match(&II, m_Intrinsic<Intrinsic::assume>()))
-          assert(AssumptionSet.count(cast<CallInst>(&II)) &&
-                 "Assumption in scanned function not in cache");
+        if (match(&II, m_Intrinsic<Intrinsic::assume>()) &&
+            !AssumptionSet.count(cast<CallInst>(&II)))
+          report_fatal_error("Assumption in scanned function not in cache");
   }
-#endif
 }
 
 AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c8d0579..e682a64 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,13 +17,13 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -36,6 +36,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include <algorithm>
 
 #define DEBUG_TYPE "basicaa"
@@ -127,7 +128,9 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
+  ObjectSizeOpts Opts;
+  Opts.RoundToAlign = RoundToAlign;
+  if (getObjectSize(V, Size, DL, &TLI, Opts))
     return Size;
   return MemoryLocation::UnknownSize;
 }
@@ -635,7 +638,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
 /// Returns true if this is a writeonly (i.e Mod only) parameter.
 static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
                              const TargetLibraryInfo &TLI) {
-  if (CS.paramHasAttr(ArgIdx + 1, Attribute::WriteOnly))
+  if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly))
     return true;
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
@@ -644,9 +647,9 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
   // whenever possible.
   // FIXME Consider handling this in InferFunctionAttr.cpp together with other
   // attributes.
-  LibFunc::Func F;
+  LibFunc F;
   if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
-      F == LibFunc::memset_pattern16 && TLI.has(F))
+      F == LibFunc_memset_pattern16 && TLI.has(F))
     if (ArgIdx == 0)
       return true;
 
@@ -664,10 +667,10 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
   if (isWriteOnlyParam(CS, ArgIdx, TLI))
     return MRI_Mod;
 
-  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+  if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
     return MRI_Ref;
 
-  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+  if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
     return MRI_NoModRef;
 
   return AAResultBase::getArgModRefInfo(CS, ArgIdx);
@@ -680,8 +683,11 @@ static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
 
 #ifndef NDEBUG
 static const Function *getParent(const Value *V) {
-  if (const Instruction *inst = dyn_cast<Instruction>(V))
+  if (const Instruction *inst = dyn_cast<Instruction>(V)) {
+    if (!inst->getParent())
+      return nullptr;
     return inst->getParent()->getParent();
+  }
 
   if (const Argument *arg = dyn_cast<Argument>(V))
     return arg->getParent();
@@ -749,7 +755,11 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // as an argument, and itself doesn't capture it.
   if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
       isNonEscapingLocalObject(Object)) {
-    bool PassedAsArg = false;
+
+    // Optimistically assume that call doesn't touch Object and check this
+    // assumption in the following loop.
+    ModRefInfo Result = MRI_NoModRef;
+
     unsigned OperandNo = 0;
     for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
          CI != CE; ++CI, ++OperandNo) {
@@ -761,20 +771,38 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
            OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
         continue;
 
+      // Call doesn't access memory through this operand, so we don't care
+      // if it aliases with Object.
+      if (CS.doesNotAccessMemory(OperandNo))
+        continue;
+
       // If this is a no-capture pointer argument, see if we can tell that it
-      // is impossible to alias the pointer we're checking.  If not, we have to
-      // assume that the call could touch the pointer, even though it doesn't
-      // escape.
+      // is impossible to alias the pointer we're checking.
       AliasResult AR =
           getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
-      if (AR) {
-        PassedAsArg = true;
-        break;
+
+      // Operand doesnt alias 'Object', continue looking for other aliases
+      if (AR == NoAlias)
+        continue;
+      // Operand aliases 'Object', but call doesn't modify it. Strengthen
+      // initial assumption and keep looking in case if there are more aliases.
+      if (CS.onlyReadsMemory(OperandNo)) {
+        Result = static_cast<ModRefInfo>(Result | MRI_Ref);
+        continue;
       }
+      // Operand aliases 'Object' but call only writes into it.
+      if (CS.doesNotReadMemory(OperandNo)) {
+        Result = static_cast<ModRefInfo>(Result | MRI_Mod);
+        continue;
+      }
+      // This operand aliases 'Object' and call reads and writes into it.
+      Result = MRI_ModRef;
+      break;
     }
 
-    if (!PassedAsArg)
-      return MRI_NoModRef;
+    // Early return if we improved mod ref information
+    if (Result != MRI_ModRef)
+      return Result;
   }
 
   // If the CallSite is to malloc or calloc, we can assume that it doesn't
@@ -784,7 +812,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // well.  Or alternatively, replace all of this with inaccessiblememonly once
   // that's implemented fully. 
   auto *Inst = CS.getInstruction();
-  if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) {
+  if (isMallocOrCallocLikeFn(Inst, &TLI)) {
     // Be conservative if the accessed pointer may alias the allocation -
     // fallback to the generic handling below.
     if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
@@ -900,10 +928,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
                                             uint64_t V2Size,
                                             const DataLayout &DL) {
 
-  assert(GEP1->getPointerOperand()->stripPointerCasts() ==
-         GEP2->getPointerOperand()->stripPointerCasts() &&
-         GEP1->getPointerOperand()->getType() ==
-         GEP2->getPointerOperand()->getType() &&
+  assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
+             GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
+         GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
          "Expected GEPs with the same pointer operand");
 
   // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
@@ -979,15 +1006,32 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
     // Because they cannot partially overlap and because fields in an array
     // cannot overlap, if we can prove the final indices are different between
     // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
-    
+
     // If the last indices are constants, we've already checked they don't
     // equal each other so we can exit early.
     if (C1 && C2)
       return NoAlias;
-    if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
-                        GEP2->getOperand(GEP2->getNumOperands() - 1),
-                        DL))
-      return NoAlias;
+    {
+      Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1);
+      Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1);
+      if (isa<PHINode>(GEP1LastIdx) || isa<PHINode>(GEP2LastIdx)) {
+        // If one of the indices is a PHI node, be safe and only use
+        // computeKnownBits so we don't make any assumptions about the
+        // relationships between the two indices. This is important if we're
+        // asking about values from different loop iterations. See PR32314.
+        // TODO: We may be able to change the check so we only do this when
+        // we definitely looked through a PHINode.
+        if (GEP1LastIdx != GEP2LastIdx &&
+            GEP1LastIdx->getType() == GEP2LastIdx->getType()) {
+          KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL);
+          KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL);
+          if (Known1.Zero.intersects(Known2.One) ||
+              Known1.One.intersects(Known2.Zero))
+            return NoAlias;
+        }
+      } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
+        return NoAlias;
+    }
     return MayAlias;
   } else if (!LastIndexedStruct || !C1 || !C2) {
     return MayAlias;
@@ -1161,10 +1205,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // If we know the two GEPs are based off of the exact same pointer (and not
     // just the same underlying object), see if that tells us anything about
     // the resulting pointers.
-    if (GEP1->getPointerOperand()->stripPointerCasts() ==
-        GEP2->getPointerOperand()->stripPointerCasts() &&
-        GEP1->getPointerOperand()->getType() ==
-        GEP2->getPointerOperand()->getType()) {
+    if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
+            GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
+        GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
       AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
       // If we couldn't find anything interesting, don't abandon just yet.
       if (R != MayAlias)
@@ -1261,9 +1304,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         // give up if we can't determine conditions that hold for every cycle:
         const Value *V = DecompGEP1.VarIndices[i].V;
 
-        bool SignKnownZero, SignKnownOne;
-        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
-                       0, &AC, nullptr, DT);
+        KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT);
+        bool SignKnownZero = Known.isNonNegative();
+        bool SignKnownOne = Known.isNegative();
 
         // Zero-extension widens the variable, and so forces the sign
         // bit to zero.
@@ -1305,11 +1348,7 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   // Statically, we can see that the base objects are the same, but the
   // pointers have dynamic offsets which we can't resolve. And none of our
   // little tricks above worked.
-  //
-  // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
-  // practical effect of this is protecting TBAA in the case of dynamic
-  // indices into arrays of unions or malloc'd memory.
-  return PartialAlias;
+  return MayAlias;
 }
 
 static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
@@ -1478,8 +1517,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
     return NoAlias;
 
   // Strip off any casts if they exist.
-  V1 = V1->stripPointerCasts();
-  V2 = V2->stripPointerCasts();
+  V1 = V1->stripPointerCastsAndBarriers();
+  V2 = V2->stripPointerCastsAndBarriers();
 
   // If V1 or V2 is undef, the result is NoAlias because we can always pick a
   // value for undef that aliases nothing in the program.
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 4cdbe4d..07a2a92 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "block-freq"
 
-#ifndef NDEBUG
 static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG(
     "view-block-freq-propagation-dags", cl::Hidden,
     cl::desc("Pop up a window to show a dag displaying how block "
@@ -55,8 +54,29 @@ cl::opt<unsigned>
                                 "is no less than the max frequency of the "
                                 "function multiplied by this percent."));
 
+// Command line option to turn on CFG dot dump after profile annotation.
+cl::opt<bool>
+    PGOViewCounts("pgo-view-counts", cl::init(false), cl::Hidden,
+                  cl::desc("A boolean option to show CFG dag with "
+                           "block profile counts and branch probabilities "
+                           "right after PGO profile annotation step. The "
+                           "profile counts are computed using branch "
+                           "probabilities from the runtime profile data and "
+                           "block frequency propagation algorithm. To view "
+                           "the raw counts from the profile, use option "
+                           "-pgo-view-raw-counts instead. To limit graph "
+                           "display to only one function, use filtering option "
+                           "-view-bfi-func-name."));
+
 namespace llvm {
 
+static GVDAGType getGVDT() {
+
+  if (PGOViewCounts)
+    return GVDT_Count;
+  return ViewBlockFreqPropagationDAG;
+}
+
 template <>
 struct GraphTraits<BlockFrequencyInfo *> {
   typedef const BasicBlock *NodeRef;
@@ -89,8 +109,7 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
   std::string getNodeLabel(const BasicBlock *Node,
                            const BlockFrequencyInfo *Graph) {
 
-    return BFIDOTGTraitsBase::getNodeLabel(Node, Graph,
-                                           ViewBlockFreqPropagationDAG);
+    return BFIDOTGTraitsBase::getNodeLabel(Node, Graph, getGVDT());
   }
 
   std::string getNodeAttributes(const BasicBlock *Node,
@@ -107,7 +126,6 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
 };
 
 } // end namespace llvm
-#endif
 
 BlockFrequencyInfo::BlockFrequencyInfo() {}
 
@@ -132,19 +150,26 @@ BlockFrequencyInfo &BlockFrequencyInfo::operator=(BlockFrequencyInfo &&RHS) {
 // template instantiated which is not available in the header.
 BlockFrequencyInfo::~BlockFrequencyInfo() {}
 
+bool BlockFrequencyInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+                                    FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis, all analyses on functions, or the function's
+  // CFG have been preserved.
+  auto PAC = PA.getChecker<BlockFrequencyAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+           PAC.preservedSet<CFGAnalyses>());
+}
+
 void BlockFrequencyInfo::calculate(const Function &F,
                                    const BranchProbabilityInfo &BPI,
                                    const LoopInfo &LI) {
   if (!BFI)
     BFI.reset(new ImplType);
   BFI->calculate(F, BPI, LI);
-#ifndef NDEBUG
   if (ViewBlockFreqPropagationDAG != GVDT_None &&
       (ViewBlockFreqFuncName.empty() ||
        F.getName().equals(ViewBlockFreqFuncName))) {
     view();
   }
-#endif
 }
 
 BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
@@ -171,16 +196,32 @@ void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
   BFI->setBlockFreq(BB, Freq);
 }
 
+void BlockFrequencyInfo::setBlockFreqAndScale(
+    const BasicBlock *ReferenceBB, uint64_t Freq,
+    SmallPtrSetImpl<BasicBlock *> &BlocksToScale) {
+  assert(BFI && "Expected analysis to be available");
+  // Use 128 bits APInt to avoid overflow.
+  APInt NewFreq(128, Freq);
+  APInt OldFreq(128, BFI->getBlockFreq(ReferenceBB).getFrequency());
+  APInt BBFreq(128, 0);
+  for (auto *BB : BlocksToScale) {
+    BBFreq = BFI->getBlockFreq(BB).getFrequency();
+    // Multiply first by NewFreq and then divide by OldFreq
+    // to minimize loss of precision.
+    BBFreq *= NewFreq;
+    // udiv is an expensive operation in the general case. If this ends up being
+    // a hot spot, one of the options proposed in
+    // https://reviews.llvm.org/D28535#650071 could be used to avoid this.
+    BBFreq = BBFreq.udiv(OldFreq);
+    BFI->setBlockFreq(BB, BBFreq.getLimitedValue());
+  }
+  BFI->setBlockFreq(ReferenceBB, Freq);
+}
+
 /// Pop up a ghostview window with the current block frequency propagation
 /// rendered using dot.
 void BlockFrequencyInfo::view() const {
-// This code is only for debugging.
-#ifndef NDEBUG
   ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
-#else
-  errs() << "BlockFrequencyInfo::view is only available in debug builds on "
-            "systems with Graphviz or gv!\n";
-#endif // NDEBUG
 }
 
 const Function *BlockFrequencyInfo::getFunction() const {
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 9850e02..e5d8c33 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -28,7 +28,9 @@ ScaledNumber<uint64_t> BlockMass::toScaled() const {
   return ScaledNumber<uint64_t>(getMass() + 1, -64);
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
+#endif
 
 static char getHexDigit(int N) {
   assert(N < 16);
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 3eabb78..a329e5a 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -30,6 +31,7 @@ using namespace llvm;
 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
                       "Branch Probability Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
                     "Branch Probability Analysis", false, true)
 
@@ -58,19 +60,12 @@ char BranchProbabilityInfoWrapperPass::ID = 0;
 static const uint32_t LBH_TAKEN_WEIGHT = 124;
 static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
 
-/// \brief Unreachable-terminating branch taken weight.
+/// \brief Unreachable-terminating branch taken probability.
 ///
-/// This is the weight for a branch being taken to a block that terminates
+/// This is the probability for a branch being taken to a block that terminates
 /// (eventually) in unreachable. These are predicted as unlikely as possible.
-static const uint32_t UR_TAKEN_WEIGHT = 1;
-
-/// \brief Unreachable-terminating branch not-taken weight.
-///
-/// This is the weight for a branch not being taken toward a block that
-/// terminates (eventually) in unreachable. Such a branch is essentially never
-/// taken. Set the weight to an absurdly high value so that nested loops don't
-/// easily subsume it.
-static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+/// All reachable probability will equally share the remaining part.
+static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
 
 /// \brief Weight for a branch taken going into a cold block.
 ///
@@ -108,11 +103,9 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
 /// instruction. This is essentially never taken.
 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
 
-/// \brief Calculate edge weights for successors lead to unreachable.
-///
-/// Predict that a successor which leads necessarily to an
-/// unreachable-terminated block as extremely unlikely.
-bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
+/// \brief Add \p BB to PostDominatedByUnreachable set if applicable.
+void
+BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
   const TerminatorInst *TI = BB->getTerminator();
   if (TI->getNumSuccessors() == 0) {
     if (isa<UnreachableInst>(TI) ||
@@ -122,39 +115,85 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
         // never execute.
         BB->getTerminatingDeoptimizeCall())
       PostDominatedByUnreachable.insert(BB);
-    return false;
+    return;
+  }
+
+  // If the terminator is an InvokeInst, check only the normal destination block
+  // as the unwind edge of InvokeInst is also very unlikely taken.
+  if (auto *II = dyn_cast<InvokeInst>(TI)) {
+    if (PostDominatedByUnreachable.count(II->getNormalDest()))
+      PostDominatedByUnreachable.insert(BB);
+    return;
+  }
+
+  for (auto *I : successors(BB))
+    // If any of successor is not post dominated then BB is also not.
+    if (!PostDominatedByUnreachable.count(I))
+      return;
+
+  PostDominatedByUnreachable.insert(BB);
+}
+
+/// \brief Add \p BB to PostDominatedByColdCall set if applicable.
+void
+BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
+  assert(!PostDominatedByColdCall.count(BB));
+  const TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 0)
+    return;
+
+  // If all of successor are post dominated then BB is also done.
+  if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
+        return PostDominatedByColdCall.count(SuccBB);
+      })) {
+    PostDominatedByColdCall.insert(BB);
+    return;
   }
 
+  // If the terminator is an InvokeInst, check only the normal destination
+  // block as the unwind edge of InvokeInst is also very unlikely taken.
+  if (auto *II = dyn_cast<InvokeInst>(TI))
+    if (PostDominatedByColdCall.count(II->getNormalDest())) {
+      PostDominatedByColdCall.insert(BB);
+      return;
+    }
+
+  // Otherwise, if the block itself contains a cold function, add it to the
+  // set of blocks post-dominated by a cold call.
+  for (auto &I : *BB)
+    if (const CallInst *CI = dyn_cast<CallInst>(&I))
+      if (CI->hasFnAttr(Attribute::Cold)) {
+        PostDominatedByColdCall.insert(BB);
+        return;
+      }
+}
+
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
+  const TerminatorInst *TI = BB->getTerminator();
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+  // Return false here so that edge weights for InvokeInst could be decided
+  // in calcInvokeHeuristics().
+  if (isa<InvokeInst>(TI))
+    return false;
+
   SmallVector<unsigned, 4> UnreachableEdges;
   SmallVector<unsigned, 4> ReachableEdges;
 
-  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
     if (PostDominatedByUnreachable.count(*I))
       UnreachableEdges.push_back(I.getSuccessorIndex());
     else
       ReachableEdges.push_back(I.getSuccessorIndex());
-  }
 
-  // If all successors are in the set of blocks post-dominated by unreachable,
-  // this block is too.
-  if (UnreachableEdges.size() == TI->getNumSuccessors())
-    PostDominatedByUnreachable.insert(BB);
-
-  // Skip probabilities if this block has a single successor or if all were
-  // reachable.
-  if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
+  // Skip probabilities if all were reachable.
+  if (UnreachableEdges.empty())
     return false;
 
-  // If the terminator is an InvokeInst, check only the normal destination block
-  // as the unwind edge of InvokeInst is also very unlikely taken.
-  if (auto *II = dyn_cast<InvokeInst>(TI))
-    if (PostDominatedByUnreachable.count(II->getNormalDest())) {
-      PostDominatedByUnreachable.insert(BB);
-      // Return false here so that edge weights for InvokeInst could be decided
-      // in calcInvokeHeuristics().
-      return false;
-    }
-
   if (ReachableEdges.empty()) {
     BranchProbability Prob(1, UnreachableEdges.size());
     for (unsigned SuccIdx : UnreachableEdges)
@@ -162,12 +201,10 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
     return true;
   }
 
-  auto UnreachableProb = BranchProbability::getBranchProbability(
-      UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                           uint64_t(UnreachableEdges.size()));
-  auto ReachableProb = BranchProbability::getBranchProbability(
-      UR_NONTAKEN_WEIGHT,
-      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
+  auto UnreachableProb = UR_TAKEN_PROB;
+  auto ReachableProb =
+      (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) /
+      ReachableEdges.size();
 
   for (unsigned SuccIdx : UnreachableEdges)
     setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -178,11 +215,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
 }
 
 // Propagate existing explicit probabilities from either profile data or
-// 'expect' intrinsic processing.
+// 'expect' intrinsic processing. Examine metadata against unreachable
+// heuristic. The probability of the edge coming to unreachable block is
+// set to min of metadata and unreachable heuristic.
 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
   const TerminatorInst *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 1)
-    return false;
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
   if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
     return false;
 
@@ -203,6 +241,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
   // be scaled to fit in 32 bits.
   uint64_t WeightSum = 0;
   SmallVector<uint32_t, 2> Weights;
+  SmallVector<unsigned, 2> UnreachableIdxs;
+  SmallVector<unsigned, 2> ReachableIdxs;
   Weights.reserve(TI->getNumSuccessors());
   for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
     ConstantInt *Weight =
@@ -213,6 +253,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
            "Too many bits for uint32_t");
     Weights.push_back(Weight->getZExtValue());
     WeightSum += Weights.back();
+    if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1)))
+      UnreachableIdxs.push_back(i - 1);
+    else
+      ReachableIdxs.push_back(i - 1);
   }
   assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
 
@@ -221,22 +265,49 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
   uint64_t ScalingFactor =
       (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
 
-  WeightSum = 0;
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    Weights[i] /= ScalingFactor;
-    WeightSum += Weights[i];
+  if (ScalingFactor > 1) {
+    WeightSum = 0;
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+      Weights[i] /= ScalingFactor;
+      WeightSum += Weights[i];
+    }
   }
+  assert(WeightSum <= UINT32_MAX &&
+         "Expected weights to scale down to 32 bits");
 
-  if (WeightSum == 0) {
+  if (WeightSum == 0 || ReachableIdxs.size() == 0) {
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      setEdgeProbability(BB, i, {1, e});
-  } else {
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+      Weights[i] = 1;
+    WeightSum = TI->getNumSuccessors();
   }
 
-  assert(WeightSum <= UINT32_MAX &&
-         "Expected weights to scale down to 32 bits");
+  // Set the probability.
+  SmallVector<BranchProbability, 2> BP;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    BP.push_back({ Weights[i], static_cast<uint32_t>(WeightSum) });
+
+  // Examine the metadata against unreachable heuristic.
+  // If the unreachable heuristic is more strong then we use it for this edge.
+  if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) {
+    auto ToDistribute = BranchProbability::getZero();
+    auto UnreachableProb = UR_TAKEN_PROB;
+    for (auto i : UnreachableIdxs)
+      if (UnreachableProb < BP[i]) {
+        ToDistribute += BP[i] - UnreachableProb;
+        BP[i] = UnreachableProb;
+      }
+
+    // If we modified the probability of some edges then we must distribute
+    // the difference between reachable blocks.
+    if (ToDistribute > BranchProbability::getZero()) {
+      BranchProbability PerEdge = ToDistribute / ReachableIdxs.size();
+      for (auto i : ReachableIdxs)
+        BP[i] += PerEdge;
+    }
+  }
+
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    setEdgeProbability(BB, i, BP[i]);
 
   return true;
 }
@@ -251,7 +322,11 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
 /// Return false, otherwise.
 bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
   const TerminatorInst *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0)
+  assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
+
+  // Return false here so that edge weights for InvokeInst could be decided
+  // in calcInvokeHeuristics().
+  if (isa<InvokeInst>(TI))
     return false;
 
   // Determine which successors are post-dominated by a cold block.
@@ -263,34 +338,8 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
     else
       NormalEdges.push_back(I.getSuccessorIndex());
 
-  // If all successors are in the set of blocks post-dominated by cold calls,
-  // this block is in the set post-dominated by cold calls.
-  if (ColdEdges.size() == TI->getNumSuccessors())
-    PostDominatedByColdCall.insert(BB);
-  else {
-    // Otherwise, if the block itself contains a cold function, add it to the
-    // set of blocks postdominated by a cold call.
-    assert(!PostDominatedByColdCall.count(BB));
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (const CallInst *CI = dyn_cast<CallInst>(I))
-        if (CI->hasFnAttr(Attribute::Cold)) {
-          PostDominatedByColdCall.insert(BB);
-          break;
-        }
-  }
-
-  if (auto *II = dyn_cast<InvokeInst>(TI)) {
-    // If the terminator is an InvokeInst, consider only the normal destination
-    // block.
-    if (PostDominatedByColdCall.count(II->getNormalDest()))
-      PostDominatedByColdCall.insert(BB);
-    // Return false here so that edge weights for InvokeInst could be decided
-    // in calcInvokeHeuristics().
-    return false;
-  }
-
-  // Skip probabilities if this block has a single successor.
-  if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
+  // Skip probabilities if no cold edges.
+  if (ColdEdges.empty())
     return false;
 
   if (NormalEdges.empty()) {
@@ -410,7 +459,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
   return true;
 }
 
-bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
+bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
+                                               const TargetLibraryInfo *TLI) {
   const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
   if (!BI || !BI->isConditional())
     return false;
@@ -433,8 +483,37 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
         if (AndRHS->getUniqueInteger().isPowerOf2())
           return false;
 
+  // Check if the LHS is the return value of a library function
+  LibFunc Func = NumLibFuncs;
+  if (TLI)
+    if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
+      if (Function *CalledFn = Call->getCalledFunction())
+        TLI->getLibFunc(*CalledFn, Func);
+
   bool isProb;
-  if (CV->isZero()) {
+  if (Func == LibFunc_strcasecmp ||
+      Func == LibFunc_strcmp ||
+      Func == LibFunc_strncasecmp ||
+      Func == LibFunc_strncmp ||
+      Func == LibFunc_memcmp) {
+    // strcmp and similar functions return zero, negative, or positive, if the
+    // first string is equal, less, or greater than the second. We consider it
+    // likely that the strings are not equal, so a comparison with zero is
+    // probably false, but also a comparison with any other number is also
+    // probably false given that what exactly is returned for nonzero values is
+    // not specified. Any kind of comparison other than equality we know
+    // nothing about.
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
+  } else if (CV->isZero()) {
     switch (CI->getPredicate()) {
     case CmpInst::ICMP_EQ:
       // X == 0   ->  Unlikely
@@ -459,7 +538,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
     // InstCombine canonicalizes X <= 0 into X < 1.
     // X <= 0   ->  Unlikely
     isProb = false;
-  } else if (CV->isAllOnesValue()) {
+  } else if (CV->isMinusOne()) {
     switch (CI->getPredicate()) {
     case CmpInst::ICMP_EQ:
       // X == -1  ->  Unlikely
@@ -660,7 +739,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
   }
 }
 
-void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
+void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
+                                      const TargetLibraryInfo *TLI) {
   DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
                << " ----\n\n");
   LastF = &F; // Store the last function we ran on for printing.
@@ -671,17 +751,22 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
   // the successors of a block iteratively.
   for (auto BB : post_order(&F.getEntryBlock())) {
     DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
-    if (calcUnreachableHeuristics(BB))
+    updatePostDominatedByUnreachable(BB);
+    updatePostDominatedByColdCall(BB);
+    // If there is no at least two successors, no sense to set probability.
+    if (BB->getTerminator()->getNumSuccessors() < 2)
       continue;
     if (calcMetadataWeights(BB))
       continue;
+    if (calcUnreachableHeuristics(BB))
+      continue;
     if (calcColdCallHeuristics(BB))
       continue;
     if (calcLoopBranchHeuristics(BB, LI))
       continue;
     if (calcPointerHeuristics(BB))
       continue;
-    if (calcZeroHeuristics(BB))
+    if (calcZeroHeuristics(BB, TLI))
       continue;
     if (calcFloatingPointHeuristics(BB))
       continue;
@@ -695,12 +780,14 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
 void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
     AnalysisUsage &AU) const {
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
   const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  BPI.calculate(F, LI);
+  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  BPI.calculate(F, LI, &TLI);
   return false;
 }
 
@@ -715,7 +802,7 @@ AnalysisKey BranchProbabilityAnalysis::Key;
 BranchProbabilityInfo
 BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   BranchProbabilityInfo BPI;
-  BPI.calculate(F, AM.getResult<LoopAnalysis>(F));
+  BPI.calculate(F, AM.getResult<LoopAnalysis>(F), &AM.getResult<TargetLibraryAnalysis>(F));
   return BPI;
 }
 
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index e48ff23..0de7ad9 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -68,17 +68,6 @@ CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
     : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
 CFLAndersAAResult::~CFLAndersAAResult() {}
 
-static const Function *parentFunctionOfValue(const Value *Val) {
-  if (auto *Inst = dyn_cast<Instruction>(Val)) {
-    auto *Bb = Inst->getParent();
-    return Bb->getParent();
-  }
-
-  if (auto *Arg = dyn_cast<Argument>(Val))
-    return Arg->getParent();
-  return nullptr;
-}
-
 namespace {
 
 enum class MatchState : uint8_t {
@@ -307,7 +296,7 @@ class CFLAndersAAResult::FunctionInfo {
 
 public:
   FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
-               const ReachabilitySet &, AliasAttrMap);
+               const ReachabilitySet &, const AliasAttrMap &);
 
   bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
   const AliasSummary &getAliasSummary() const { return Summary; }
@@ -470,7 +459,7 @@ static void populateExternalAttributes(
 
 CFLAndersAAResult::FunctionInfo::FunctionInfo(
     const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
-    const ReachabilitySet &ReachSet, AliasAttrMap AMap) {
+    const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) {
   populateAttrMap(AttrMap, AMap);
   populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
   populateAliasMap(AliasMap, ReachSet);
@@ -789,10 +778,10 @@ void CFLAndersAAResult::scan(const Function &Fn) {
   // resize and invalidating the reference returned by operator[]
   auto FunInfo = buildInfoFrom(Fn);
   Cache[&Fn] = std::move(FunInfo);
-  Handles.push_front(FunctionHandle(const_cast<Function *>(&Fn), this));
+  Handles.emplace_front(const_cast<Function *>(&Fn), this);
 }
 
-void CFLAndersAAResult::evict(const Function &Fn) { Cache.erase(&Fn); }
+void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
 
 const Optional<CFLAndersAAResult::FunctionInfo> &
 CFLAndersAAResult::ensureCached(const Function &Fn) {
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index e526e0e..95874b8 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -16,7 +16,6 @@
 #define LLVM_ANALYSIS_CFLGRAPH_H
 
 #include "AliasAnalysisSummary.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
@@ -210,6 +209,11 @@ template <typename CFLAA> class CFLGraphBuilder {
 
     void addDerefEdge(Value *From, Value *To, bool IsRead) {
       assert(From != nullptr && To != nullptr);
+      // FIXME: This is subtly broken, due to how we model some instructions
+      // (e.g. extractvalue, extractelement) as loads. Since those take
+      // non-pointer operands, we'll entirely skip adding edges for those.
+      //
+      // addAssignEdge seems to have a similar issue with insertvalue, etc.
       if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
         return;
       addNode(From);
@@ -400,8 +404,7 @@ template <typename CFLAA> class CFLGraphBuilder {
       // TODO: address other common library functions such as realloc(),
       // strdup(),
       // etc.
-      if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
-          isFreeCall(Inst, &TLI))
+      if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
         return;
 
       // TODO: Add support for noalias args/all the other fun function
@@ -430,7 +433,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
       if (Inst->getType()->isPointerTy()) {
         auto *Fn = CS.getCalledFunction();
-        if (Fn == nullptr || !Fn->doesNotAlias(0))
+        if (Fn == nullptr || !Fn->returnDoesNotAlias())
           // No need to call addNode() since we've added Inst at the
           // beginning of this function and we know it is not a global.
           Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
@@ -541,6 +544,7 @@ template <typename CFLAA> class CFLGraphBuilder {
       case Instruction::ExtractValue: {
         auto *Ptr = CE->getOperand(0);
         addLoadEdge(Ptr, CE);
+        break;
       }
       case Instruction::ShuffleVector: {
         auto *From1 = CE->getOperand(0);
diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index dde24ef..adbdd82 100644
--- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -80,9 +80,6 @@ public:
   const AliasSummary &getAliasSummary() const { return Summary; }
 };
 
-/// Try to go from a Value* to a Function*. Never returns nullptr.
-static Optional<Function *> parentFunctionOfValue(Value *);
-
 const StratifiedIndex StratifiedLink::SetSentinel =
     std::numeric_limits<StratifiedIndex>::max();
 
@@ -91,19 +88,6 @@ const StratifiedIndex StratifiedLink::SetSentinel =
 //===----------------------------------------------------------------------===//
 
 /// Determines whether it would be pointless to add the given Value to our sets.
-static bool canSkipAddingToSets(Value *Val);
-
-static Optional<Function *> parentFunctionOfValue(Value *Val) {
-  if (auto *Inst = dyn_cast<Instruction>(Val)) {
-    auto *Bb = Inst->getParent();
-    return Bb->getParent();
-  }
-
-  if (auto *Arg = dyn_cast<Argument>(Val))
-    return Arg->getParent();
-  return None;
-}
-
 static bool canSkipAddingToSets(Value *Val) {
   // Constants can share instances, which may falsely unify multiple
   // sets, e.g. in
@@ -248,7 +232,7 @@ void CFLSteensAAResult::scan(Function *Fn) {
   auto FunInfo = buildSetsFrom(Fn);
   Cache[Fn] = std::move(FunInfo);
 
-  Handles.push_front(FunctionHandle(Fn, this));
+  Handles.emplace_front(Fn, this);
 }
 
 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
@@ -284,9 +268,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
     return NoAlias;
 
   Function *Fn = nullptr;
-  auto MaybeFnA = parentFunctionOfValue(ValA);
-  auto MaybeFnB = parentFunctionOfValue(ValB);
-  if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+  Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
+  Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
+  if (!MaybeFnA && !MaybeFnB) {
     // The only times this is known to happen are when globals + InlineAsm are
     // involved
     DEBUG(dbgs()
@@ -294,12 +278,12 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
     return MayAlias;
   }
 
-  if (MaybeFnA.hasValue()) {
-    Fn = *MaybeFnA;
-    assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+  if (MaybeFnA) {
+    Fn = MaybeFnA;
+    assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
            "Interprocedural queries not supported");
   } else {
-    Fn = *MaybeFnB;
+    Fn = MaybeFnB;
   }
 
   assert(Fn != nullptr);
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index 054bdc4..74b5d79 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -117,6 +117,7 @@ bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
       PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
 
   // Ok, we have a graph, so we can propagate the invalidation down into it.
+  G->buildRefSCCs();
   for (auto &RC : G->postorder_ref_sccs())
     for (auto &C : RC) {
       Optional<PreservedAnalyses> InnerPA;
@@ -195,18 +196,117 @@ FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C,
 bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
     LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
     CGSCCAnalysisManager::Invalidator &Inv) {
-  for (LazyCallGraph::Node &N : C)
-    FAM->invalidate(N.getFunction(), PA);
+  // If literally everything is preserved, we're done.
+  if (PA.areAllPreserved())
+    return false; // This is still a valid proxy.
+
+  // If this proxy isn't marked as preserved, then even if the result remains
+  // valid, the key itself may no longer be valid, so we clear everything.
+  //
+  // Note that in order to preserve this proxy, a module pass must ensure that
+  // the FAM has been completely updated to handle the deletion of functions.
+  // Specifically, any FAM-cached results for those functions need to have been
+  // forcibly cleared. When preserved, this proxy will only invalidate results
+  // cached on functions *still in the module* at the end of the module pass.
+  auto PAC = PA.getChecker<FunctionAnalysisManagerCGSCCProxy>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<LazyCallGraph::SCC>>()) {
+    for (LazyCallGraph::Node &N : C)
+      FAM->clear(N.getFunction());
 
-  // This proxy doesn't need to handle invalidation itself. Instead, the
-  // module-level CGSCC proxy handles it above by ensuring that if the
-  // module-level FAM proxy becomes invalid the entire SCC layer, which
-  // includes this proxy, is cleared.
+    return true;
+  }
+
+  // Directly check if the relevant set is preserved.
+  bool AreFunctionAnalysesPreserved =
+      PA.allAnalysesInSetPreserved<AllAnalysesOn<Function>>();
+
+  // Now walk all the functions to see if any inner analysis invalidation is
+  // necessary.
+  for (LazyCallGraph::Node &N : C) {
+    Function &F = N.getFunction();
+    Optional<PreservedAnalyses> FunctionPA;
+
+    // Check to see whether the preserved set needs to be pruned based on
+    // SCC-level analysis invalidation that triggers deferred invalidation
+    // registered with the outer analysis manager proxy for this function.
+    if (auto *OuterProxy =
+            FAM->getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F))
+      for (const auto &OuterInvalidationPair :
+           OuterProxy->getOuterInvalidations()) {
+        AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+        const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+        if (Inv.invalidate(OuterAnalysisID, C, PA)) {
+          if (!FunctionPA)
+            FunctionPA = PA;
+          for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+            FunctionPA->abandon(InnerAnalysisID);
+        }
+      }
+
+    // Check if we needed a custom PA set, and if so we'll need to run the
+    // inner invalidation.
+    if (FunctionPA) {
+      FAM->invalidate(F, *FunctionPA);
+      continue;
+    }
+
+    // Otherwise we only need to do invalidation if the original PA set didn't
+    // preserve all function analyses.
+    if (!AreFunctionAnalysesPreserved)
+      FAM->invalidate(F, PA);
+  }
+
+  // Return false to indicate that this result is still a valid proxy.
   return false;
 }
 
 } // End llvm namespace
 
+/// When a new SCC is created for the graph and there might be function
+/// analysis results cached for the functions now in that SCC two forms of
+/// updates are required.
+///
+/// First, a proxy from the SCC to the FunctionAnalysisManager needs to be
+/// created so that any subsequent invalidation events to the SCC are
+/// propagated to the function analysis results cached for functions within it.
+///
+/// Second, if any of the functions within the SCC have analysis results with
+/// outer analysis dependencies, then those dependencies would point to the
+/// *wrong* SCC's analysis result. We forcibly invalidate the necessary
+/// function analyses so that they don't retain stale handles.
+static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C,
+                                         LazyCallGraph &G,
+                                         CGSCCAnalysisManager &AM) {
+  // Get the relevant function analysis manager.
+  auto &FAM =
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, G).getManager();
+
+  // Now walk the functions in this SCC and invalidate any function analysis
+  // results that might have outer dependencies on an SCC analysis.
+  for (LazyCallGraph::Node &N : C) {
+    Function &F = N.getFunction();
+
+    auto *OuterProxy =
+        FAM.getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F);
+    if (!OuterProxy)
+      // No outer analyses were queried, nothing to do.
+      continue;
+
+    // Forcibly abandon all the inner analyses with dependencies, but
+    // invalidate nothing else.
+    auto PA = PreservedAnalyses::all();
+    for (const auto &OuterInvalidationPair :
+         OuterProxy->getOuterInvalidations()) {
+      const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+      for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+        PA.abandon(InnerAnalysisID);
+    }
+
+    // Now invalidate anything we found.
+    FAM.invalidate(F, PA);
+  }
+}
+
 namespace {
 /// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
 /// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
@@ -235,7 +335,6 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
     dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
 
   SCC *OldC = C;
-  (void)OldC;
 
   // Update the current SCC. Note that if we have new SCCs, this must actually
   // change the SCC.
@@ -244,6 +343,26 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
   C = &*NewSCCRange.begin();
   assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
 
+  // If we had a cached FAM proxy originally, we will want to create more of
+  // them for each SCC that was split off.
+  bool NeedFAMProxy =
+      AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*OldC) != nullptr;
+
+  // We need to propagate an invalidation call to all but the newly current SCC
+  // because the outer pass manager won't do that for us after splitting them.
+  // FIXME: We should accept a PreservedAnalysis from the CG updater so that if
+  // there are preserved ananalyses we can avoid invalidating them here for
+  // split-off SCCs.
+  // We know however that this will preserve any FAM proxy so go ahead and mark
+  // that.
+  PreservedAnalyses PA;
+  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+  AM.invalidate(*OldC, PA);
+
+  // Ensure the now-current SCC's function analyses are updated.
+  if (NeedFAMProxy)
+    updateNewSCCFunctionAnalyses(*C, G, AM);
+
   for (SCC &NewC :
        reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
     assert(C != &NewC && "No need to re-visit the current SCC!");
@@ -251,6 +370,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
     UR.CWorklist.insert(&NewC);
     if (DebugLogging)
       dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+
+    // Ensure new SCCs' function analyses are updated.
+    if (NeedFAMProxy)
+      updateNewSCCFunctionAnalyses(NewC, G, AM);
+
+    // Also propagate a normal invalidation to the new SCC as only the current
+    // will get one from the pass manager infrastructure.
+    AM.invalidate(NewC, PA);
   }
   return C;
 }
@@ -273,9 +400,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
   // demoted edges.
   SmallVector<Constant *, 16> Worklist;
   SmallPtrSet<Constant *, 16> Visited;
-  SmallPtrSet<Function *, 16> RetainedEdges;
-  SmallSetVector<Function *, 4> PromotedRefTargets;
-  SmallSetVector<Function *, 4> DemotedCallTargets;
+  SmallPtrSet<Node *, 16> RetainedEdges;
+  SmallSetVector<Node *, 4> PromotedRefTargets;
+  SmallSetVector<Node *, 4> DemotedCallTargets;
 
   // First walk the function and handle all called functions. We do this first
   // because if there is a single call edge, whether there are ref edges is
@@ -284,7 +411,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
     if (auto CS = CallSite(&I))
       if (Function *Callee = CS.getCalledFunction())
         if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
-          const Edge *E = N.lookup(*Callee);
+          Node &CalleeN = *G.lookup(*Callee);
+          Edge *E = N->lookup(CalleeN);
           // FIXME: We should really handle adding new calls. While it will
           // make downstream usage more complex, there is no fundamental
           // limitation and it will allow passes within the CGSCC to be a bit
@@ -293,9 +421,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
           assert(E && "No function transformations should introduce *new* "
                       "call edges! Any new calls should be modeled as "
                       "promoted existing ref edges!");
-          RetainedEdges.insert(Callee);
+          RetainedEdges.insert(&CalleeN);
           if (!E->isCall())
-            PromotedRefTargets.insert(Callee);
+            PromotedRefTargets.insert(&CalleeN);
         }
 
   // Now walk all references.
@@ -305,25 +433,31 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
         if (Visited.insert(C).second)
           Worklist.push_back(C);
 
-  LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
-    const Edge *E = N.lookup(Referee);
+  auto VisitRef = [&](Function &Referee) {
+    Node &RefereeN = *G.lookup(Referee);
+    Edge *E = N->lookup(RefereeN);
     // FIXME: Similarly to new calls, we also currently preclude
     // introducing new references. See above for details.
     assert(E && "No function transformations should introduce *new* ref "
                 "edges! Any new ref edges would require IPO which "
                 "function passes aren't allowed to do!");
-    RetainedEdges.insert(&Referee);
+    RetainedEdges.insert(&RefereeN);
     if (E->isCall())
-      DemotedCallTargets.insert(&Referee);
-  });
+      DemotedCallTargets.insert(&RefereeN);
+  };
+  LazyCallGraph::visitReferences(Worklist, Visited, VisitRef);
+
+  // Include synthetic reference edges to known, defined lib functions.
+  for (auto *F : G.getLibFunctions())
+    VisitRef(*F);
 
   // First remove all of the edges that are no longer present in this function.
   // We have to build a list of dead targets first and then remove them as the
   // data structures will all be invalidated by removing them.
   SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
-  for (Edge &E : N)
-    if (!RetainedEdges.count(&E.getFunction()))
-      DeadTargets.push_back({E.getNode(), E.getKind()});
+  for (Edge &E : *N)
+    if (!RetainedEdges.count(&E.getNode()))
+      DeadTargets.push_back({&E.getNode(), E.getKind()});
   for (auto DeadTarget : DeadTargets) {
     Node &TargetN = *DeadTarget.getPointer();
     bool IsCall = DeadTarget.getInt() == Edge::Call;
@@ -346,14 +480,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
         // For separate SCCs this is trivial.
         RC->switchTrivialInternalEdgeToRef(N, TargetN);
       } else {
-        // Otherwise we may end up re-structuring the call graph. First,
-        // invalidate any SCC analyses. We have to do this before we split
-        // functions into new SCCs and lose track of where their analyses are
-        // cached.
-        // FIXME: We should accept a more precise preserved set here. For
-        // example, it might be possible to preserve some function analyses
-        // even as the SCC structure is changed.
-        AM.invalidate(*C, PreservedAnalyses::none());
         // Now update the call graph.
         C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
                                    N, C, AM, UR, DebugLogging);
@@ -397,9 +523,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
   // Next demote all the call edges that are now ref edges. This helps make
   // the SCCs small which should minimize the work below as we don't want to
   // form cycles that this would break.
-  for (Function *RefTarget : DemotedCallTargets) {
-    Node &TargetN = *G.lookup(*RefTarget);
-    SCC &TargetC = *G.lookupSCC(TargetN);
+  for (Node *RefTarget : DemotedCallTargets) {
+    SCC &TargetC = *G.lookupSCC(*RefTarget);
     RefSCC &TargetRC = TargetC.getOuterRefSCC();
 
     // The easy case is when the target RefSCC is not this RefSCC. This is
@@ -407,10 +532,10 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
     if (&TargetRC != RC) {
       assert(RC->isAncestorOf(TargetRC) &&
              "Cannot potentially form RefSCC cycles here!");
-      RC->switchOutgoingEdgeToRef(N, TargetN);
+      RC->switchOutgoingEdgeToRef(N, *RefTarget);
       if (DebugLogging)
         dbgs() << "Switch outgoing call edge to a ref edge from '" << N
-               << "' to '" << TargetN << "'\n";
+               << "' to '" << *RefTarget << "'\n";
       continue;
     }
 
@@ -418,26 +543,18 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
     // some SCCs.
     if (C != &TargetC) {
       // For separate SCCs this is trivial.
-      RC->switchTrivialInternalEdgeToRef(N, TargetN);
+      RC->switchTrivialInternalEdgeToRef(N, *RefTarget);
       continue;
     }
 
-    // Otherwise we may end up re-structuring the call graph. First, invalidate
-    // any SCC analyses. We have to do this before we split functions into new
-    // SCCs and lose track of where their analyses are cached.
-    // FIXME: We should accept a more precise preserved set here. For example,
-    // it might be possible to preserve some function analyses even as the SCC
-    // structure is changed.
-    AM.invalidate(*C, PreservedAnalyses::none());
     // Now update the call graph.
-    C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
-                               N, C, AM, UR, DebugLogging);
+    C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N,
+                               C, AM, UR, DebugLogging);
   }
 
   // Now promote ref edges into call edges.
-  for (Function *CallTarget : PromotedRefTargets) {
-    Node &TargetN = *G.lookup(*CallTarget);
-    SCC &TargetC = *G.lookupSCC(TargetN);
+  for (Node *CallTarget : PromotedRefTargets) {
+    SCC &TargetC = *G.lookupSCC(*CallTarget);
     RefSCC &TargetRC = TargetC.getOuterRefSCC();
 
     // The easy case is when the target RefSCC is not this RefSCC. This is
@@ -445,38 +562,61 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
     if (&TargetRC != RC) {
       assert(RC->isAncestorOf(TargetRC) &&
              "Cannot potentially form RefSCC cycles here!");
-      RC->switchOutgoingEdgeToCall(N, TargetN);
+      RC->switchOutgoingEdgeToCall(N, *CallTarget);
       if (DebugLogging)
         dbgs() << "Switch outgoing ref edge to a call edge from '" << N
-               << "' to '" << TargetN << "'\n";
+               << "' to '" << *CallTarget << "'\n";
       continue;
     }
     if (DebugLogging)
       dbgs() << "Switch an internal ref edge to a call edge from '" << N
-             << "' to '" << TargetN << "'\n";
+             << "' to '" << *CallTarget << "'\n";
 
     // Otherwise we are switching an internal ref edge to a call edge. This
     // may merge away some SCCs, and we add those to the UpdateResult. We also
     // need to make sure to update the worklist in the event SCCs have moved
-    // before the current one in the post-order sequence.
+    // before the current one in the post-order sequence
+    bool HasFunctionAnalysisProxy = false;
     auto InitialSCCIndex = RC->find(*C) - RC->begin();
-    auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN);
-    if (!InvalidatedSCCs.empty()) {
+    bool FormedCycle = RC->switchInternalEdgeToCall(
+        N, *CallTarget, [&](ArrayRef<SCC *> MergedSCCs) {
+          for (SCC *MergedC : MergedSCCs) {
+            assert(MergedC != &TargetC && "Cannot merge away the target SCC!");
+
+            HasFunctionAnalysisProxy |=
+                AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(
+                    *MergedC) != nullptr;
+
+            // Mark that this SCC will no longer be valid.
+            UR.InvalidatedSCCs.insert(MergedC);
+
+            // FIXME: We should really do a 'clear' here to forcibly release
+            // memory, but we don't have a good way of doing that and
+            // preserving the function analyses.
+            auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
+            PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+            AM.invalidate(*MergedC, PA);
+          }
+        });
+
+    // If we formed a cycle by creating this call, we need to update more data
+    // structures.
+    if (FormedCycle) {
       C = &TargetC;
       assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
 
-      // Any analyses cached for this SCC are no longer precise as the shape
-      // has changed by introducing this cycle.
-      AM.invalidate(*C, PreservedAnalyses::none());
-
-      for (SCC *InvalidatedC : InvalidatedSCCs) {
-        assert(InvalidatedC != C && "Cannot invalidate the current SCC!");
-        UR.InvalidatedSCCs.insert(InvalidatedC);
+      // If one of the invalidated SCCs had a cached proxy to a function
+      // analysis manager, we need to create a proxy in the new current SCC as
+      // the invaliadted SCCs had their functions moved.
+      if (HasFunctionAnalysisProxy)
+        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G);
 
-        // Also clear any cached analyses for the SCCs that are dead. This
-        // isn't really necessary for correctness but can release memory.
-        AM.clear(*InvalidatedC);
-      }
+      // Any analyses cached for this SCC are no longer precise as the shape
+      // has changed by introducing this cycle. However, we have taken care to
+      // update the proxies so it remains valide.
+      auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
+      PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+      AM.invalidate(*C, PA);
     }
     auto NewSCCIndex = RC->find(*C) - RC->begin();
     if (InitialSCCIndex < NewSCCIndex) {
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index 458b7bf..ff5242f 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -21,23 +21,18 @@ using namespace llvm;
 //
 
 CallGraph::CallGraph(Module &M)
-    : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
+    : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)),
       CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
   // Add every function to the call graph.
   for (Function &F : M)
     addToCallGraph(&F);
-
-  // If we didn't find a main function, use the external call graph node
-  if (!Root)
-    Root = ExternalCallingNode;
 }
 
 CallGraph::CallGraph(CallGraph &&Arg)
-    : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+    : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)),
       ExternalCallingNode(Arg.ExternalCallingNode),
       CallsExternalNode(std::move(Arg.CallsExternalNode)) {
   Arg.FunctionMap.clear();
-  Arg.Root = nullptr;
   Arg.ExternalCallingNode = nullptr;
 }
 
@@ -57,21 +52,9 @@ CallGraph::~CallGraph() {
 void CallGraph::addToCallGraph(Function *F) {
   CallGraphNode *Node = getOrInsertFunction(F);
 
-  // If this function has external linkage, anything could call it.
-  if (!F->hasLocalLinkage()) {
-    ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
-    // Found the entry point?
-    if (F->getName() == "main") {
-      if (Root) // Found multiple external mains?  Don't pick one.
-        Root = ExternalCallingNode;
-      else
-        Root = Node; // Found a main, keep track of it!
-    }
-  }
-
-  // If this function has its address taken, anything could call it.
-  if (F->hasAddressTaken())
+  // If this function has external linkage or has its address taken, anything
+  // could call it.
+  if (!F->hasLocalLinkage() || F->hasAddressTaken())
     ExternalCallingNode->addCalledFunction(CallSite(), Node);
 
   // If this function is not defined in this translation unit, it could call
@@ -96,13 +79,6 @@ void CallGraph::addToCallGraph(Function *F) {
 }
 
 void CallGraph::print(raw_ostream &OS) const {
-  OS << "CallGraph Root is: ";
-  if (Function *F = Root->getFunction())
-    OS << F->getName() << "\n";
-  else {
-    OS << "<<null function: 0x" << Root << ">>\n";
-  }
-
   // Print in a deterministic order by sorting CallGraphNodes by name.  We do
   // this here to avoid slowing down the non-printing fast path.
 
@@ -125,8 +101,9 @@ void CallGraph::print(raw_ostream &OS) const {
     CN->print(OS);
 }
 
-LLVM_DUMP_METHOD
-void CallGraph::dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); }
+#endif
 
 // removeFunctionFromModule - Unlink the function from this module, returning
 // it.  Because this removes the function from the module, the call graph node
@@ -194,8 +171,9 @@ void CallGraphNode::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-LLVM_DUMP_METHOD
-void CallGraphNode::dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); }
+#endif
 
 /// removeCallEdgeFor - This method removes the edge in the node for the
 /// specified call site.  Note that this method takes linear time, so it
@@ -307,8 +285,10 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
   G->print(OS);
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD
 void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
+#endif
 
 namespace {
 struct CallGraphPrinterLegacyPass : public ModulePass {
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 9cef781..facda24 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -204,7 +204,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
     // Get the set of call sites currently in the function.
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
       // If this call site is null, then the function pass deleted the call
-      // entirely and the WeakVH nulled it out.  
+      // entirely and the WeakTrackingVH nulled it out.
       if (!I->first ||
           // If we've already seen this call site, then the FunctionPass RAUW'd
           // one call with another, which resulted in two "uses" in the edge
@@ -347,7 +347,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
       DevirtualizedCall = true;
     
     // After scanning this function, if we still have entries in callsites, then
-    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // they are dangling pointers.  WeakTrackingVH should save us for this, so
+    // abort if
     // this happens.
     assert(CallSites.empty() && "Dangling pointers found in call sites map");
     
@@ -476,10 +477,8 @@ bool CGPassManager::runOnModule(Module &M) {
     if (DevirtualizedCall)
       DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
                    << " times, due to -max-cg-scc-iterations\n");
-    
-    if (Iteration > MaxSCCIterations)
-      MaxSCCIterations = Iteration;
-    
+
+    MaxSCCIterations.updateMax(Iteration);
   }
   Changed |= doFinalization(CG);
   return Changed;
@@ -609,16 +608,28 @@ namespace {
     }
 
     bool runOnSCC(CallGraphSCC &SCC) override {
-      Out << Banner;
+      bool BannerPrinted = false;
+      auto PrintBannerOnce = [&] () {
+        if (BannerPrinted)
+          return;
+        Out << Banner;
+        BannerPrinted = true;
+        };
       for (CallGraphNode *CGN : SCC) {
-        if (CGN->getFunction()) {
-          if (isFunctionInPrintList(CGN->getFunction()->getName()))
-            CGN->getFunction()->print(Out);
-        } else
+        if (Function *F = CGN->getFunction()) {
+          if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
+            PrintBannerOnce();
+            F->print(Out);
+          }
+        } else if (llvm::isFunctionInPrintList("*")) {
+          PrintBannerOnce();
           Out << "\nPrinting <null> Function\n";
+        }
       }
       return false;
     }
+    
+    StringRef getPassName() const override { return "Print CallGraph IR"; }
   };
   
 } // end anonymous namespace.
diff --git a/contrib/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp
index af942e9..e7017e7 100644
--- a/contrib/llvm/lib/Analysis/CallPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallPrinter.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 9862c3c..3b0026b 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -16,11 +16,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -94,8 +94,8 @@ namespace {
         // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or
         // by its successors, i.e, prune if:
         //
-        //  (1) BB is an entry block or have no sucessors.
-        //  (2) There's no path coming back through BB sucessors.
+        //  (1) BB is an entry block or have no successors.
+        //  (2) There's no path coming back through BB successors.
         if (BB == &BB->getParent()->getEntryBlock() ||
             !BB->getTerminator()->getNumSuccessors())
           return true;
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index bdffdd8..e4d9292 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 7386727..0f5ec3f 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -22,8 +22,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"
@@ -42,6 +42,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cerrno>
@@ -686,25 +687,21 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
   // bits.
 
   if (Opc == Instruction::And) {
-    unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
-    APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
-    APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
-    computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
-    computeKnownBits(Op1, KnownZero1, KnownOne1, DL);
-    if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
+    KnownBits Known0 = computeKnownBits(Op0, DL);
+    KnownBits Known1 = computeKnownBits(Op1, DL);
+    if ((Known1.One | Known0.Zero).isAllOnesValue()) {
       // All the bits of Op0 that the 'and' could be masking are already zero.
       return Op0;
     }
-    if ((KnownOne0 | KnownZero1).isAllOnesValue()) {
+    if ((Known0.One | Known1.Zero).isAllOnesValue()) {
       // All the bits of Op1 that the 'and' could be masking are already zero.
       return Op1;
     }
 
-    APInt KnownZero = KnownZero0 | KnownZero1;
-    APInt KnownOne = KnownOne0 & KnownOne1;
-    if ((KnownZero | KnownOne).isAllOnesValue()) {
-      return ConstantInt::get(Op0->getType(), KnownOne);
-    }
+    Known0.Zero |= Known1.Zero;
+    Known0.One &= Known1.One;
+    if (Known0.isConstant())
+      return ConstantInt::get(Op0->getType(), Known0.getConstant());
   }
 
   // If the constant expr is something like &A[123] - &A[4].f, fold this into a
@@ -1018,9 +1015,11 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
   case Instruction::ICmp:
   case Instruction::FCmp: llvm_unreachable("Invalid for compares");
   case Instruction::Call:
-    if (auto *F = dyn_cast<Function>(Ops.back()))
-      if (canConstantFoldCallTo(F))
-        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
+    if (auto *F = dyn_cast<Function>(Ops.back())) {
+      ImmutableCallSite CS(cast<CallInst>(InstOrCE));
+      if (canConstantFoldCallTo(CS, F))
+        return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI);
+    }
     return nullptr;
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
@@ -1058,8 +1057,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
       if (It == FoldedOps.end()) {
         if (auto *FoldedC =
                 ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
-          NewC = FoldedC;
           FoldedOps.insert({NewC, FoldedC});
+          NewC = FoldedC;
         } else {
           FoldedOps.insert({NewC, NewC});
         }
@@ -1173,7 +1172,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 const DataLayout &DL,
                                                 const TargetLibraryInfo *TLI) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
+  // fold: icmp null, (inttoptr x)         -> icmp 0, x
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
+  // fold: icmp 0, (ptrtoint x)            -> icmp null, x
   // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
   // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
   //
@@ -1243,6 +1244,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
         Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
       return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
     }
+  } else if (isa<ConstantExpr>(Ops1)) {
+    // If RHS is a constant expression, but the left side isn't, swap the
+    // operands and try again.
+    Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate);
+    return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
   }
 
   return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
@@ -1352,7 +1358,9 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 //  Constant Folding for Calls
 //
 
-bool llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
+  if (CS.isNoBuiltin())
+    return false;
   switch (F->getIntrinsicID()) {
   case Intrinsic::fabs:
   case Intrinsic::minnum:
@@ -1401,7 +1409,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
     return true;
   default:
     return false;
-  case 0: break;
+  case Intrinsic::not_intrinsic: break;
   }
 
   if (!F->hasName())
@@ -1438,6 +1446,36 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
            Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
   case 't':
     return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+  case '_':
+
+    // Check for various function names that get used for the math functions
+    // when the header files are preprocessed with the macro
+    // __FINITE_MATH_ONLY__ enabled.
+    // The '12' here is the length of the shortest name that can match.
+    // We need to check the size before looking at Name[1] and Name[2]
+    // so we may as well check a limit that will eliminate mismatches.
+    if (Name.size() < 12 || Name[1] != '_')
+      return false;
+    switch (Name[2]) {
+    default:
+      return false;
+    case 'a':
+      return Name == "__acos_finite" || Name == "__acosf_finite" ||
+             Name == "__asin_finite" || Name == "__asinf_finite" ||
+             Name == "__atan2_finite" || Name == "__atan2f_finite";
+    case 'c':
+      return Name == "__cosh_finite" || Name == "__coshf_finite";
+    case 'e':
+      return Name == "__exp_finite" || Name == "__expf_finite" ||
+             Name == "__exp2_finite" || Name == "__exp2f_finite";
+    case 'l':
+      return Name == "__log_finite" || Name == "__logf_finite" ||
+             Name == "__log10_finite" || Name == "__log10f_finite";
+    case 'p':
+      return Name == "__pow_finite" || Name == "__powf_finite";
+    case 's':
+      return Name == "__sinh_finite" || Name == "__sinhf_finite";
+    }
   }
 }
 
@@ -1518,9 +1556,9 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
   bool isExact = false;
   APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
                                               : APFloat::rmNearestTiesToEven;
-  APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
-                                                  /*isSigned=*/true, mode,
-                                                  &isExact);
+  APFloat::opStatus status =
+      Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
+                           /*isSigned=*/true, mode, &isExact);
   if (status != APFloat::opOK &&
       (!roundTowardZero || status != APFloat::opInexact))
     return nullptr;
@@ -1550,6 +1588,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
       // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
       if (IntrinsicID == Intrinsic::cos)
         return Constant::getNullValue(Ty);
+      if (IntrinsicID == Intrinsic::bswap ||
+          IntrinsicID == Intrinsic::bitreverse)
+        return Operands[0];
     }
     if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
       if (IntrinsicID == Intrinsic::convert_to_fp16) {
@@ -1630,94 +1671,108 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
           return ConstantFoldFP(sin, V, Ty);
         case Intrinsic::cos:
           return ConstantFoldFP(cos, V, Ty);
+        case Intrinsic::sqrt:
+          return ConstantFoldFP(sqrt, V, Ty);
       }
 
       if (!TLI)
         return nullptr;
 
-      switch (Name[0]) {
+      char NameKeyChar = Name[0];
+      if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
+        NameKeyChar = Name[2];
+
+      switch (NameKeyChar) {
       case 'a':
-        if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
-            (Name == "acosf" && TLI->has(LibFunc::acosf)))
+        if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
+            (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
+            (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
+            (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
           return ConstantFoldFP(acos, V, Ty);
-        else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
-                 (Name == "asinf" && TLI->has(LibFunc::asinf)))
+        else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
+                 (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
+                 (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
+                 (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
           return ConstantFoldFP(asin, V, Ty);
-        else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
-                 (Name == "atanf" && TLI->has(LibFunc::atanf)))
+        else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
+                 (Name == "atanf" && TLI->has(LibFunc_atanf)))
           return ConstantFoldFP(atan, V, Ty);
         break;
       case 'c':
-        if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
-            (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
+        if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
+            (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
           return ConstantFoldFP(ceil, V, Ty);
-        else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
-                 (Name == "cosf" && TLI->has(LibFunc::cosf)))
+        else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
+                 (Name == "cosf" && TLI->has(LibFunc_cosf)))
           return ConstantFoldFP(cos, V, Ty);
-        else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
-                 (Name == "coshf" && TLI->has(LibFunc::coshf)))
+        else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
+                 (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
+                 (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
+                 (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
           return ConstantFoldFP(cosh, V, Ty);
         break;
       case 'e':
-        if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
-            (Name == "expf" && TLI->has(LibFunc::expf)))
+        if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
+            (Name == "expf" && TLI->has(LibFunc_expf)) ||
+            (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
+            (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
           return ConstantFoldFP(exp, V, Ty);
-        if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
-            (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
+        if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
+            (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
+            (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
+            (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
           // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
           // C99 library.
           return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
         break;
       case 'f':
-        if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
-            (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
+        if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
+            (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
           return ConstantFoldFP(fabs, V, Ty);
-        else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
-                 (Name == "floorf" && TLI->has(LibFunc::floorf)))
+        else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
+                 (Name == "floorf" && TLI->has(LibFunc_floorf)))
           return ConstantFoldFP(floor, V, Ty);
         break;
       case 'l':
-        if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
-            (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
+        if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
+            (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
+            (Name == "__log_finite" && V > 0 &&
+              TLI->has(LibFunc_log_finite)) ||
+            (Name == "__logf_finite" && V > 0 &&
+              TLI->has(LibFunc_logf_finite)))
           return ConstantFoldFP(log, V, Ty);
-        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
-                 (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
+        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
+                 (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
+                 (Name == "__log10_finite" && V > 0 &&
+                   TLI->has(LibFunc_log10_finite)) ||
+                 (Name == "__log10f_finite" && V > 0 &&
+                   TLI->has(LibFunc_log10f_finite)))
           return ConstantFoldFP(log10, V, Ty);
-        else if (IntrinsicID == Intrinsic::sqrt &&
-                 (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
-          if (V >= -0.0)
-            return ConstantFoldFP(sqrt, V, Ty);
-          else {
-            // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which
-            // all guarantee or favor returning NaN - the square root of a
-            // negative number is not defined for the LLVM sqrt intrinsic.
-            // This is because the intrinsic should only be emitted in place of
-            // libm's sqrt function when using "no-nans-fp-math".
-            return UndefValue::get(Ty);
-          }
-        }
         break;
       case 'r':
-        if ((Name == "round" && TLI->has(LibFunc::round)) ||
-            (Name == "roundf" && TLI->has(LibFunc::roundf)))
+        if ((Name == "round" && TLI->has(LibFunc_round)) ||
+            (Name == "roundf" && TLI->has(LibFunc_roundf)))
           return ConstantFoldFP(round, V, Ty);
+        break;
       case 's':
-        if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
-            (Name == "sinf" && TLI->has(LibFunc::sinf)))
+        if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
+            (Name == "sinf" && TLI->has(LibFunc_sinf)))
           return ConstantFoldFP(sin, V, Ty);
-        else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
-                 (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
+        else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
+                 (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
+                 (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
+                 (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
           return ConstantFoldFP(sinh, V, Ty);
-        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
-                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
+        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
+                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
           return ConstantFoldFP(sqrt, V, Ty);
         break;
       case 't':
-        if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
-            (Name == "tanf" && TLI->has(LibFunc::tanf)))
+        if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
+            (Name == "tanf" && TLI->has(LibFunc_tanf)))
           return ConstantFoldFP(tan, V, Ty);
-        else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
-                 (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
+        else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
+                 (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
           return ConstantFoldFP(tanh, V, Ty);
         break;
       default:
@@ -1767,6 +1822,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
                                              /*roundTowardZero=*/false, Ty);
+        break;
       case Intrinsic::x86_sse_cvttss2si:
       case Intrinsic::x86_sse_cvttss2si64:
       case Intrinsic::x86_sse2_cvttsd2si:
@@ -1775,15 +1831,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
                                              /*roundTowardZero=*/true, Ty);
+        break;
       }
     }
 
-    if (isa<UndefValue>(Operands[0])) {
-      if (IntrinsicID == Intrinsic::bswap)
-        return Operands[0];
-      return nullptr;
-    }
-
     return nullptr;
   }
 
@@ -1822,14 +1873,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
 
         if (!TLI)
           return nullptr;
-        if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
-            (Name == "powf" && TLI->has(LibFunc::powf)))
+        if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
+            (Name == "powf" && TLI->has(LibFunc_powf)) ||
+            (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
+            (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
-            (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
+        if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
+            (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
           return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
-            (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
+        if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
+            (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
+            (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
+            (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
         if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
@@ -1980,6 +2035,14 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
   for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
     // Gather a column of constants.
     for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
+      // These intrinsics use a scalar type for their second argument.
+      if (J == 1 &&
+          (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz ||
+           IntrinsicID == Intrinsic::powi)) {
+        Lane[J] = Operands[J];
+        continue;
+      }
+
       Constant *Agg = Operands[J]->getAggregateElement(I);
       if (!Agg)
         return nullptr;
@@ -2000,8 +2063,11 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
 } // end anonymous namespace
 
 Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
+                       ArrayRef<Constant *> Operands,
                        const TargetLibraryInfo *TLI) {
+  if (CS.isNoBuiltin())
+    return nullptr;
   if (!F->hasName())
     return nullptr;
   StringRef Name = F->getName();
@@ -2018,11 +2084,13 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
   // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
   // (and to some extent ConstantFoldScalarCall).
+  if (CS.isNoBuiltin())
+    return false;
   Function *F = CS.getCalledFunction();
   if (!F)
     return false;
 
-  LibFunc::Func Func;
+  LibFunc Func;
   if (!TLI || !TLI->getLibFunc(*F, Func))
     return false;
 
@@ -2030,20 +2098,20 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
     if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
       const APFloat &Op = OpC->getValueAPF();
       switch (Func) {
-      case LibFunc::logl:
-      case LibFunc::log:
-      case LibFunc::logf:
-      case LibFunc::log2l:
-      case LibFunc::log2:
-      case LibFunc::log2f:
-      case LibFunc::log10l:
-      case LibFunc::log10:
-      case LibFunc::log10f:
+      case LibFunc_logl:
+      case LibFunc_log:
+      case LibFunc_logf:
+      case LibFunc_log2l:
+      case LibFunc_log2:
+      case LibFunc_log2f:
+      case LibFunc_log10l:
+      case LibFunc_log10:
+      case LibFunc_log10f:
         return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
 
-      case LibFunc::expl:
-      case LibFunc::exp:
-      case LibFunc::expf:
+      case LibFunc_expl:
+      case LibFunc_exp:
+      case LibFunc_expf:
         // FIXME: These boundaries are slightly conservative.
         if (OpC->getType()->isDoubleTy())
           return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
@@ -2053,9 +2121,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
                  Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
         break;
 
-      case LibFunc::exp2l:
-      case LibFunc::exp2:
-      case LibFunc::exp2f:
+      case LibFunc_exp2l:
+      case LibFunc_exp2:
+      case LibFunc_exp2f:
         // FIXME: These boundaries are slightly conservative.
         if (OpC->getType()->isDoubleTy())
           return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
@@ -2065,17 +2133,17 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
                  Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
         break;
 
-      case LibFunc::sinl:
-      case LibFunc::sin:
-      case LibFunc::sinf:
-      case LibFunc::cosl:
-      case LibFunc::cos:
-      case LibFunc::cosf:
+      case LibFunc_sinl:
+      case LibFunc_sin:
+      case LibFunc_sinf:
+      case LibFunc_cosl:
+      case LibFunc_cos:
+      case LibFunc_cosf:
         return !Op.isInfinity();
 
-      case LibFunc::tanl:
-      case LibFunc::tan:
-      case LibFunc::tanf: {
+      case LibFunc_tanl:
+      case LibFunc_tan:
+      case LibFunc_tanf: {
         // FIXME: Stop using the host math library.
         // FIXME: The computation isn't done in the right precision.
         Type *Ty = OpC->getType();
@@ -2086,23 +2154,23 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
         break;
       }
 
-      case LibFunc::asinl:
-      case LibFunc::asin:
-      case LibFunc::asinf:
-      case LibFunc::acosl:
-      case LibFunc::acos:
-      case LibFunc::acosf:
+      case LibFunc_asinl:
+      case LibFunc_asin:
+      case LibFunc_asinf:
+      case LibFunc_acosl:
+      case LibFunc_acos:
+      case LibFunc_acosf:
         return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
                    APFloat::cmpLessThan &&
                Op.compare(APFloat(Op.getSemantics(), "1")) !=
                    APFloat::cmpGreaterThan;
 
-      case LibFunc::sinh:
-      case LibFunc::cosh:
-      case LibFunc::sinhf:
-      case LibFunc::coshf:
-      case LibFunc::sinhl:
-      case LibFunc::coshl:
+      case LibFunc_sinh:
+      case LibFunc_cosh:
+      case LibFunc_sinhf:
+      case LibFunc_coshf:
+      case LibFunc_sinhl:
+      case LibFunc_coshl:
         // FIXME: These boundaries are slightly conservative.
         if (OpC->getType()->isDoubleTy())
           return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
@@ -2112,9 +2180,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
                  Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
         break;
 
-      case LibFunc::sqrtl:
-      case LibFunc::sqrt:
-      case LibFunc::sqrtf:
+      case LibFunc_sqrtl:
+      case LibFunc_sqrt:
+      case LibFunc_sqrtf:
         return Op.isNaN() || Op.isZero() || !Op.isNegative();
 
       // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
@@ -2133,9 +2201,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
       const APFloat &Op1 = Op1C->getValueAPF();
 
       switch (Func) {
-      case LibFunc::powl:
-      case LibFunc::pow:
-      case LibFunc::powf: {
+      case LibFunc_powl:
+      case LibFunc_pow:
+      case LibFunc_powf: {
         // FIXME: Stop using the host math library.
         // FIXME: The computation isn't done in the right precision.
         Type *Ty = Op0C->getType();
@@ -2149,9 +2217,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
         break;
       }
 
-      case LibFunc::fmodl:
-      case LibFunc::fmod:
-      case LibFunc::fmodf:
+      case LibFunc_fmodl:
+      case LibFunc_fmod:
+      case LibFunc_fmodf:
         return Op0.isNaN() || Op1.isNaN() ||
                (!Op0.isInfinity() && !Op1.isZero());
 
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 6b773979..32bfea5 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -447,25 +447,25 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
   }
   case Instruction::Store: {
     const StoreInst *SI = cast<StoreInst>(I);
     Type *ValTy = SI->getValueOperand()->getType();
     return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
-                                 SI->getAlignment(),
-                                 SI->getPointerAddressSpace());
+                                SI->getAlignment(),
+                                SI->getPointerAddressSpace(), I);
   }
   case Instruction::Load: {
     const LoadInst *LI = cast<LoadInst>(I);
     return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
-                                 LI->getAlignment(),
-                                 LI->getPointerAddressSpace());
+                                LI->getAlignment(),
+                                LI->getPointerAddressSpace(), I);
   }
   case Instruction::ZExt:
   case Instruction::SExt:
@@ -481,7 +481,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   case Instruction::BitCast:
   case Instruction::AddrSpaceCast: {
     Type *SrcTy = I->getOperand(0)->getType();
-    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
   }
   case Instruction::ExtractElement: {
     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
@@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   }
   case Instruction::Call:
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      SmallVector<Value *, 4> Args;
-      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
-        Args.push_back(II->getArgOperand(J));
+      SmallVector<Value *, 4> Args(II->arg_operands());
 
       FastMathFlags FMF;
       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index 688c1db..9c53f91 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -72,8 +73,7 @@ static bool isAlwaysLive(Instruction *I) {
 
 void DemandedBits::determineLiveOperandBits(
     const Instruction *UserI, const Instruction *I, unsigned OperandNo,
-    const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
-    APInt &KnownZero2, APInt &KnownOne2) {
+    const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) {
   unsigned BitWidth = AB.getBitWidth();
 
   // We're called once per operand, but for some instructions, we need to
@@ -85,16 +85,12 @@ void DemandedBits::determineLiveOperandBits(
   auto ComputeKnownBits =
       [&](unsigned BitWidth, const Value *V1, const Value *V2) {
         const DataLayout &DL = I->getModule()->getDataLayout();
-        KnownZero = APInt(BitWidth, 0);
-        KnownOne = APInt(BitWidth, 0);
-        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
-                         &AC, UserI, &DT);
+        Known = KnownBits(BitWidth);
+        computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
 
         if (V2) {
-          KnownZero2 = APInt(BitWidth, 0);
-          KnownOne2 = APInt(BitWidth, 0);
-          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
-                           0, &AC, UserI, &DT);
+          Known2 = KnownBits(BitWidth);
+          computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT);
         }
       };
 
@@ -110,6 +106,11 @@ void DemandedBits::determineLiveOperandBits(
         // the output.
         AB = AOut.byteSwap();
         break;
+      case Intrinsic::bitreverse:
+        // The alive bits of the input are the reversed alive bits of
+        // the output.
+        AB = AOut.reverseBits();
+        break;
       case Intrinsic::ctlz:
         if (OperandNo == 0) {
           // We need some output bits, so we need all bits of the
@@ -117,7 +118,7 @@ void DemandedBits::determineLiveOperandBits(
           // known to be one.
           ComputeKnownBits(BitWidth, I, nullptr);
           AB = APInt::getHighBitsSet(BitWidth,
-                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+                 std::min(BitWidth, Known.countMaxLeadingZeros()+1));
         }
         break;
       case Intrinsic::cttz:
@@ -127,7 +128,7 @@ void DemandedBits::determineLiveOperandBits(
           // known to be one.
           ComputeKnownBits(BitWidth, I, nullptr);
           AB = APInt::getLowBitsSet(BitWidth,
-                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+                 std::min(BitWidth, Known.countMaxTrailingZeros()+1));
         }
         break;
       }
@@ -142,9 +143,8 @@ void DemandedBits::determineLiveOperandBits(
     break;
   case Instruction::Shl:
     if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.lshr(ShiftAmt);
 
         // If the shift is nuw/nsw, then the high bits are not dead
@@ -158,9 +158,8 @@ void DemandedBits::determineLiveOperandBits(
     break;
   case Instruction::LShr:
     if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
 
         // If the shift is exact, then the low bits are not dead
@@ -171,16 +170,15 @@ void DemandedBits::determineLiveOperandBits(
     break;
   case Instruction::AShr:
     if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
         // Because the high input bit is replicated into the
         // high-order bits of the result, if we need any of those
         // bits, then we must keep the highest input bit.
         if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
             .getBoolValue())
-          AB.setBit(BitWidth-1);
+          AB.setSignBit();
 
         // If the shift is exact, then the low bits are not dead
         // (they must be zero).
@@ -197,11 +195,11 @@ void DemandedBits::determineLiveOperandBits(
     // dead).
     if (OperandNo == 0) {
       ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
-      AB &= ~KnownZero2;
+      AB &= ~Known2.Zero;
     } else {
       if (!isa<Instruction>(UserI->getOperand(0)))
         ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
-      AB &= ~(KnownZero & ~KnownZero2);
+      AB &= ~(Known.Zero & ~Known2.Zero);
     }
     break;
   case Instruction::Or:
@@ -213,11 +211,11 @@ void DemandedBits::determineLiveOperandBits(
     // dead).
     if (OperandNo == 0) {
       ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
-      AB &= ~KnownOne2;
+      AB &= ~Known2.One;
     } else {
       if (!isa<Instruction>(UserI->getOperand(0)))
         ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
-      AB &= ~(KnownOne & ~KnownOne2);
+      AB &= ~(Known.One & ~Known2.One);
     }
     break;
   case Instruction::Xor:
@@ -238,7 +236,7 @@ void DemandedBits::determineLiveOperandBits(
     if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
                                       AOut.getBitWidth() - BitWidth))
         .getBoolValue())
-      AB.setBit(BitWidth-1);
+      AB.setSignBit();
     break;
   case Instruction::Select:
     if (OperandNo != 0)
@@ -315,7 +313,7 @@ void DemandedBits::performAnalysis() {
     if (!UserI->getType()->isIntegerTy())
       Visited.insert(UserI);
 
-    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+    KnownBits Known, Known2;
     // Compute the set of alive bits for each operand. These are anded into the
     // existing set, if any, and if that changes the set of alive bits, the
     // operand is added to the work-list.
@@ -332,8 +330,7 @@ void DemandedBits::performAnalysis() {
             // Bits of each operand that are used to compute alive bits of the
             // output are alive, all others are dead.
             determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
-                                     KnownZero, KnownOne,
-                                     KnownZero2, KnownOne2);
+                                     Known, Known2);
           }
 
           // If we've added to the set of alive bits (or the operand has not
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index a332a07..34eccc0 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -385,9 +385,9 @@ void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
   Kind = Any;
 }
 
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 // For debugging purposes. Dumps the constraint out to OS.
-void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
+LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
   if (isEmpty())
     OS << " Empty\n";
   else if (isAny())
@@ -403,6 +403,7 @@ void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
   else
     llvm_unreachable("unknown constraint type in Constraint::dump");
 }
+#endif
 
 
 // Updates X with the intersection
@@ -2983,7 +2984,7 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
                                SmallVectorImpl<Constraint> &Constraints,
                                bool &Consistent) {
   bool Result = false;
-  for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
+  for (unsigned LI : Loops.set_bits()) {
     DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
     DEBUG(Constraints[LI].dump(dbgs()));
     if (Constraints[LI].isDistance())
@@ -3265,7 +3266,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
 // For debugging purposes, dump a small bit vector to dbgs().
 static void dumpSmallBitVector(SmallBitVector &BV) {
   dbgs() << "{";
-  for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
+  for (unsigned VI : BV.set_bits()) {
     dbgs() << VI;
     if (BV.find_next(VI) >= 0)
       dbgs() << ' ';
@@ -3341,7 +3342,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
 
     UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
                 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
-                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
+                (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) &&
+                isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV);
   }
   unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   SmallVector<Subscript, 4> Pair(Pairs);
@@ -3370,7 +3372,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
 
   if (Delinearize && CommonLevels > 1) {
     if (tryDelinearize(Src, Dst, Pair)) {
-      DEBUG(dbgs() << "    delinerized GEP\n");
+      DEBUG(dbgs() << "    delinearized GEP\n");
       Pairs = Pair.size();
     }
   }
@@ -3505,7 +3507,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   NewConstraint.setAny(SE);
 
   // test separable subscripts
-  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+  for (unsigned SI : Separable.set_bits()) {
     DEBUG(dbgs() << "testing subscript " << SI);
     switch (Pair[SI].Classification) {
     case Subscript::ZIV:
@@ -3544,14 +3546,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
     for (unsigned II = 0; II <= MaxLevels; ++II)
       Constraints[II].setAny(SE);
-    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+    for (unsigned SI : Coupled.set_bits()) {
       DEBUG(dbgs() << "testing subscript group " << SI << " { ");
       SmallBitVector Group(Pair[SI].Group);
       SmallBitVector Sivs(Pairs);
       SmallBitVector Mivs(Pairs);
       SmallBitVector ConstrainedLevels(MaxLevels + 1);
       SmallVector<Subscript *, 4> PairsInGroup;
-      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+      for (unsigned SJ : Group.set_bits()) {
         DEBUG(dbgs() << SJ << " ");
         if (Pair[SJ].Classification == Subscript::SIV)
           Sivs.set(SJ);
@@ -3563,7 +3565,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       DEBUG(dbgs() << "}\n");
       while (Sivs.any()) {
         bool Changed = false;
-        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+        for (unsigned SJ : Sivs.set_bits()) {
           DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
@@ -3587,7 +3589,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
           DEBUG(dbgs() << "    propagating\n");
           DEBUG(dbgs() << "\tMivs = ");
           DEBUG(dumpSmallBitVector(Mivs));
-          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+          for (unsigned SJ : Mivs.set_bits()) {
             // SJ is an MIV subscript that's part of the current coupled group
             DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
             if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
@@ -3621,7 +3623,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       }
 
       // test & propagate remaining RDIVs
-      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+      for (unsigned SJ : Mivs.set_bits()) {
         if (Pair[SJ].Classification == Subscript::RDIV) {
           DEBUG(dbgs() << "RDIV test\n");
           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
@@ -3634,7 +3636,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       // test remaining MIVs
       // This code is temporary.
       // Better to somehow test all remaining subscripts simultaneously.
-      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+      for (unsigned SJ : Mivs.set_bits()) {
         if (Pair[SJ].Classification == Subscript::MIV) {
           DEBUG(dbgs() << "MIV test\n");
           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
@@ -3646,9 +3648,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
 
       // update Result.DV from constraint vector
       DEBUG(dbgs() << "    updating\n");
-      for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
-           SJ = ConstrainedLevels.find_next(SJ)) {
-        if (SJ > (int)CommonLevels)
+      for (unsigned SJ : ConstrainedLevels.set_bits()) {
+        if (SJ > CommonLevels)
           break;
         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
@@ -3796,7 +3797,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
 
   if (Delinearize && CommonLevels > 1) {
     if (tryDelinearize(Src, Dst, Pair)) {
-      DEBUG(dbgs() << "    delinerized GEP\n");
+      DEBUG(dbgs() << "    delinearized GEP\n");
       Pairs = Pair.size();
     }
   }
@@ -3858,7 +3859,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
   NewConstraint.setAny(SE);
 
   // test separable subscripts
-  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
+  for (unsigned SI : Separable.set_bits()) {
     switch (Pair[SI].Classification) {
     case Subscript::SIV: {
       unsigned Level;
@@ -3885,12 +3886,12 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
     for (unsigned II = 0; II <= MaxLevels; ++II)
       Constraints[II].setAny(SE);
-    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
+    for (unsigned SI : Coupled.set_bits()) {
       SmallBitVector Group(Pair[SI].Group);
       SmallBitVector Sivs(Pairs);
       SmallBitVector Mivs(Pairs);
       SmallBitVector ConstrainedLevels(MaxLevels + 1);
-      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
+      for (unsigned SJ : Group.set_bits()) {
         if (Pair[SJ].Classification == Subscript::SIV)
           Sivs.set(SJ);
         else
@@ -3898,7 +3899,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
       }
       while (Sivs.any()) {
         bool Changed = false;
-        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
+        for (unsigned SJ : Sivs.set_bits()) {
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
           const SCEV *SplitIter = nullptr;
@@ -3913,7 +3914,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
         }
         if (Changed) {
           // propagate, possibly creating new SIVs and ZIVs
-          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
+          for (unsigned SJ : Mivs.set_bits()) {
             // SJ is an MIV subscript that's part of the current coupled group
             if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
                           Pair[SJ].Loops, Constraints, Result.Consistent)) {
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 1b36569..2d39a0b 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -241,7 +241,7 @@ void DivergencePropagator::exploreDataDependency(Value *V) {
   // Follow def-use chains of V.
   for (User *U : V->users()) {
     Instruction *UserInst = cast<Instruction>(U);
-    if (DV.insert(UserInst).second)
+    if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
       Worklist.push_back(UserInst);
   }
 }
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
index 7acfb41..8abc0e7 100644
--- a/contrib/llvm/lib/Analysis/DomPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -80,6 +80,22 @@ struct DOTGraphTraits<PostDominatorTree*>
 };
 }
 
+void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+  ViewGraph(this, Name, false, Title);
+#else
+  errs() << "DomTree dump not available, build with DEBUG\n";
+#endif  // NDEBUG
+}
+
+void DominatorTree::viewGraph() {
+#ifndef NDEBUG
+  this->viewGraph("domtree", "Dominator Tree for function");
+#else
+  errs() << "DomTree dump not available, build with DEBUG\n";
+#endif  // NDEBUG
+}
+
 namespace {
 struct DominatorTreeWrapperPassAnalysisGraphTraits {
   static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
index 15856c3..c08c6cf 100644
--- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -14,7 +14,8 @@
 using namespace llvm;
 
 namespace llvm {
-template class DominanceFrontierBase<BasicBlock>;
+template class DominanceFrontierBase<BasicBlock, false>;
+template class DominanceFrontierBase<BasicBlock, true>;
 template class ForwardDominanceFrontierBase<BasicBlock>;
 }
 
@@ -56,6 +57,16 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const {
 }
 #endif
 
+/// Handle invalidation explicitly.
+bool DominanceFrontier::invalidate(Function &F, const PreservedAnalyses &PA,
+                                   FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis, all analyses on functions, or the function's
+  // CFG have been preserved.
+  auto PAC = PA.getChecker<DominanceFrontierAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+           PAC.preservedSet<CFGAnalyses>());
+}
+
 AnalysisKey DominanceFrontierAnalysis::Key;
 
 DominanceFrontier DominanceFrontierAnalysis::run(Function &F,
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
index ebf0a37..b12ae98 100644
--- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -27,8 +27,10 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
   return StringSwitch<EHPersonality>(F->getName())
     .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
     .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX)
+    .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX)
     .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj)
     .Case("__gcc_personality_v0",  EHPersonality::GNU_C)
+    .Case("__gcc_personality_seh0",EHPersonality::GNU_C)
     .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj)
     .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
     .Case("_except_handler3",      EHPersonality::MSVC_X86SEH)
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 33f00cb..4ef0233 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -475,7 +475,9 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     const std::vector<CallGraphNode *> &SCC = *I;
     assert(!SCC.empty() && "SCC with no functions?");
 
-    if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) {
+    Function *F = SCC[0]->getFunction();
+
+    if (!F || !F->isDefinitionExact()) {
       // Calls externally or not exact - can't say anything useful. Remove any
       // existing function records (may have been created when scanning
       // globals).
@@ -484,19 +486,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
       continue;
     }
 
-    FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+    FunctionInfo &FI = FunctionInfos[F];
     bool KnowNothing = false;
 
     // Collect the mod/ref properties due to called functions.  We only compute
     // one mod-ref set.
     for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
-      Function *F = SCC[i]->getFunction();
       if (!F) {
         KnowNothing = true;
         break;
       }
 
-      if (F->isDeclaration()) {
+      if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) {
         // Try to get mod/ref behaviour from function attributes.
         if (F->doesNotAccessMemory()) {
           // Can't do better than that!
@@ -545,6 +546,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     for (auto *Node : SCC) {
       if (FI.getModRefInfo() == MRI_ModRef)
         break; // The mod/ref lattice saturates here.
+
+      // Don't prove any properties based on the implementation of an optnone
+      // function. Function attributes were already used as a best approximation
+      // above.
+      if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone))
+        continue;
+
       for (Instruction &I : instructions(Node->getFunction())) {
         if (FI.getModRefInfo() == MRI_ModRef)
           break; // The mod/ref lattice saturates here.
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index a661b01..c30feb9 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -76,9 +76,8 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
   // An add is interesting if exactly one of its operands is interesting.
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     bool AnyInterestingYet = false;
-    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
-         OI != OE; ++OI)
-      if (isInteresting(*OI, I, L, SE, LI)) {
+    for (const auto *Op : Add->operands())
+      if (isInteresting(Op, I, L, SE, LI)) {
         if (AnyInterestingYet)
           return false;
         AnyInterestingYet = true;
@@ -118,6 +117,50 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
   return true;
 }
 
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User))
+    return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN || !Operand)
+    return false; // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
+  // to a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == Operand &&
+        !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+      return false;
+
+  // Okay, all uses of Operand by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
 /// AddUsersImpl - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
@@ -208,10 +251,16 @@ bool IVUsers::AddUsersImpl(Instruction *I,
       // The regular return value here is discarded; instead of recording
       // it, we just recompute it when we need it.
       const SCEV *OriginalISE = ISE;
-      ISE = TransformForPostIncUse(NormalizeAutodetect,
-                                   ISE, User, I,
-                                   NewUse.PostIncLoops,
-                                   *SE, *DT);
+
+      auto NormalizePred = [&](const SCEVAddRecExpr *AR) {
+        auto *L = AR->getLoop();
+        bool Result = IVUseShouldUsePostIncValue(User, I, L, DT);
+        if (Result)
+          NewUse.PostIncLoops.insert(L);
+        return Result;
+      };
+
+      ISE = normalizeForPostIncUseIf(ISE, NormalizePred, *SE);
 
       // PostIncNormalization effectively simplifies the expression under
       // pre-increment assumptions. Those assumptions (no wrapping) might not
@@ -219,8 +268,7 @@ bool IVUsers::AddUsersImpl(Instruction *I,
       // transformation is invertible.
       if (OriginalISE != ISE) {
         const SCEV *DenormalizedISE =
-          TransformForPostIncUse(Denormalize, ISE, User, I,
-              NewUse.PostIncLoops, *SE, *DT);
+            denormalizeForPostIncUse(ISE, NewUse.PostIncLoops, *SE);
 
         // If we normalized the expression, but denormalization doesn't give the
         // original one, discard this user.
@@ -338,11 +386,8 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
 
 /// getExpr - Return the expression for the use.
 const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
-  return
-    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
-                           IU.getUser(), IU.getOperandValToReplace(),
-                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
-                           *SE, *DT);
+  return normalizeForPostIncUse(getReplacementExpr(IU), IU.getPostIncLoops(),
+                                *SE);
 }
 
 static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
@@ -353,9 +398,8 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
   }
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+    for (const auto *Op : Add->operands())
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(Op, L))
         return AR;
     return nullptr;
   }
diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 3da33ac..ed233d2 100644
--- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -43,7 +43,7 @@ static cl::opt<unsigned>
 // The percent threshold for the direct-call target (this call site vs the
 // total call count) for it to be considered as the promotion target.
 static cl::opt<unsigned>
-    ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden,
+    ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden,
                         cl::ZeroOrMore,
                         cl::desc("The percentage threshold for the promotion"));
 
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index 4109049..3569366 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -48,11 +49,16 @@ static cl::opt<int> HintThreshold(
     "inlinehint-threshold", cl::Hidden, cl::init(325),
     cl::desc("Threshold for inlining functions with inline hint"));
 
+static cl::opt<int>
+    ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
+                          cl::init(45),
+                          cl::desc("Threshold for inlining cold callsites"));
+
 // We introduce this threshold to help performance of instrumentation based
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
 static cl::opt<int> ColdThreshold(
-    "inlinecold-threshold", cl::Hidden, cl::init(225),
+    "inlinecold-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining functions with cold attribute"));
 
 static cl::opt<int>
@@ -60,6 +66,12 @@ static cl::opt<int>
                          cl::ZeroOrMore,
                          cl::desc("Threshold for hot callsites "));
 
+static cl::opt<int> ColdCallSiteRelFreq(
+    "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+    cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+             "entry frequency, for a callsite to be cold in the absence of "
+             "profile information."));
+
 namespace {
 
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -72,12 +84,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// Getter for the cache of @llvm.assume intrinsics.
   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
 
+  /// Getter for BlockFrequencyInfo
+  Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
+
   /// Profile summary information.
   ProfileSummaryInfo *PSI;
 
   /// The called function.
   Function &F;
 
+  // Cache the DataLayout since we use it a lot.
+  const DataLayout &DL;
+
   /// The candidate callsite being analyzed. Please do not use this to do
   /// analysis in the caller function; we want the inline cost query to be
   /// easily cacheable. Instead, use the cover function paramHasAttr.
@@ -133,9 +151,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   void disableSROA(Value *V);
   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
                           int InstructionCost);
-  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+  bool isGEPFree(GetElementPtrInst &GEP);
   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
   bool simplifyCallSite(Function *F, CallSite CS);
+  template <typename Callable>
+  bool simplifyInstruction(Instruction &I, Callable Evaluate);
   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 
   /// Return true if the given argument to the function being considered for
@@ -158,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// Return true if size growth is allowed when inlining the callee at CS.
   bool allowSizeGrowth(CallSite CS);
 
+  /// Return true if \p CS is a cold callsite.
+  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+
   // Custom analysis routines.
   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
 
@@ -202,9 +225,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 public:
   CallAnalyzer(const TargetTransformInfo &TTI,
                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+               Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
                ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
                const InlineParams &Params)
-      : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+      : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
+        PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
@@ -286,23 +311,11 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
   SROACostSavings += InstructionCost;
 }
 
-/// \brief Check whether a GEP's indices are all constant.
-///
-/// Respects any simplified values known during the analysis of this callsite.
-bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
-  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
-    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
-      return false;
-
-  return true;
-}
-
 /// \brief Accumulate a constant GEP offset into an APInt if possible.
 ///
 /// Returns false if unable to compute the offset for any reason. Respects any
 /// simplified values known during the analysis of this callsite.
 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
-  const DataLayout &DL = F.getParent()->getDataLayout();
   unsigned IntPtrWidth = DL.getPointerSizeInBits();
   assert(IntPtrWidth == Offset.getBitWidth());
 
@@ -331,13 +344,27 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
   return true;
 }
 
+/// \brief Use TTI to check whether a GEP is free.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
+  SmallVector<Value *, 4> Indices;
+  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+    if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
+       Indices.push_back(SimpleOp);
+     else
+       Indices.push_back(*I);
+  return TargetTransformInfo::TCC_Free ==
+         TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
+                        Indices);
+}
+
 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
   // Check whether inlining will turn a dynamic alloca into a static
   // alloca and handle that case.
   if (I.isArrayAllocation()) {
     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
-      const DataLayout &DL = F.getParent()->getDataLayout();
       Type *Ty = I.getAllocatedType();
       AllocatedSize = SaturatingMultiplyAdd(
           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
@@ -347,7 +374,6 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
 
   // Accumulate the allocated size.
   if (I.isStaticAlloca()) {
-    const DataLayout &DL = F.getParent()->getDataLayout();
     Type *Ty = I.getAllocatedType();
     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
   }
@@ -396,7 +422,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
         // Non-constant GEPs aren't folded, and disable SROA.
         if (SROACandidate)
           disableSROA(CostIt);
-        return false;
+        return isGEPFree(I);
       }
 
       // Add the result as a new mapping to Base + Offset.
@@ -411,7 +437,15 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
     }
   }
 
-  if (isGEPOffsetConstant(I)) {
+  // Lambda to check whether a GEP's indices are all constant.
+  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
+    for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+      if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+        return false;
+    return true;
+  };
+
+  if (IsGEPOffsetConstant(I)) {
     if (SROACandidate)
       SROAArgValues[&I] = SROAArg;
 
@@ -422,19 +456,36 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
   // Variable GEPs will require math and will disable SROA.
   if (SROACandidate)
     disableSROA(CostIt);
-  return false;
+  return isGEPFree(I);
+}
+
+/// Simplify \p I if its operands are constants and update SimplifiedValues.
+/// \p Evaluate is a callable specific to instruction type that evaluates the
+/// instruction when all the operands are constants.
+template <typename Callable>
+bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
+  SmallVector<Constant *, 2> COps;
+  for (Value *Op : I.operands()) {
+    Constant *COp = dyn_cast<Constant>(Op);
+    if (!COp)
+      COp = SimplifiedValues.lookup(Op);
+    if (!COp)
+      return false;
+    COps.push_back(COp);
+  }
+  auto *C = Evaluate(COps);
+  if (!C)
+    return false;
+  SimplifiedValues[&I] = C;
+  return true;
 }
 
 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
   // Propagate constants through bitcasts.
-  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
-  if (!COp)
-    COp = SimplifiedValues.lookup(I.getOperand(0));
-  if (COp)
-    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getBitCast(COps[0], I.getType());
+      }))
+    return true;
 
   // Track base/offsets through casts
   std::pair<Value *, APInt> BaseAndOffset =
@@ -455,19 +506,14 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 
 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Propagate constants through ptrtoint.
-  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
-  if (!COp)
-    COp = SimplifiedValues.lookup(I.getOperand(0));
-  if (COp)
-    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getPtrToInt(COps[0], I.getType());
+      }))
+    return true;
 
   // Track base/offset pairs when converted to a plain integer provided the
   // integer is large enough to represent the pointer.
   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  const DataLayout &DL = F.getParent()->getDataLayout();
   if (IntegerSize >= DL.getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset =
         ConstantOffsetPtrs.lookup(I.getOperand(0));
@@ -492,20 +538,15 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 
 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // Propagate constants through ptrtoint.
-  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
-  if (!COp)
-    COp = SimplifiedValues.lookup(I.getOperand(0));
-  if (COp)
-    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getIntToPtr(COps[0], I.getType());
+      }))
+    return true;
 
   // Track base/offset pairs when round-tripped through a pointer without
   // modifications provided the integer is not too large.
   Value *Op = I.getOperand(0);
   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  const DataLayout &DL = F.getParent()->getDataLayout();
   if (IntegerSize <= DL.getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
     if (BaseAndOffset.first)
@@ -523,14 +564,10 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 
 bool CallAnalyzer::visitCastInst(CastInst &I) {
   // Propagate constants through ptrtoint.
-  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
-  if (!COp)
-    COp = SimplifiedValues.lookup(I.getOperand(0));
-  if (COp)
-    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
+      }))
+    return true;
 
   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
   disableSROA(I.getOperand(0));
@@ -540,16 +577,10 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
 
 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
   Value *Operand = I.getOperand(0);
-  Constant *COp = dyn_cast<Constant>(Operand);
-  if (!COp)
-    COp = SimplifiedValues.lookup(Operand);
-  if (COp) {
-    const DataLayout &DL = F.getParent()->getDataLayout();
-    if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-  }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantFoldInstOperands(&I, COps[0], DL);
+      }))
+    return true;
 
   // Disable any SROA on the argument to arbitrary unary operators.
   disableSROA(Operand);
@@ -558,8 +589,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
 }
 
 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
-  unsigned ArgNo = A->getArgNo();
-  return CandidateCS.paramHasAttr(ArgNo + 1, Attr);
+  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
 }
 
 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -610,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
   return true;
 }
 
+bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+  // If global profile summary is available, then callsite's coldness is
+  // determined based on that.
+  if (PSI->hasProfileSummary())
+    return PSI->isColdCallSite(CS, CallerBFI);
+  if (!CallerBFI)
+    return false;
+
+  // In the absence of global profile summary, determine if the callsite is cold
+  // relative to caller's entry. We could potentially cache the computation of
+  // scaled entry frequency, but the added complexity is not worth it unless
+  // this scaling shows up high in the profiles.
+  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
+  auto CallSiteBB = CS.getInstruction()->getParent();
+  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+  auto CallerEntryFreq =
+      CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+  return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   // If no size growth is allowed for this inlining, set Threshold to 0.
   if (!allowSizeGrowth(CS)) {
@@ -642,17 +692,34 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
     if (Callee.hasFnAttribute(Attribute::InlineHint))
       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
     if (PSI) {
-      uint64_t TotalWeight;
-      if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
-          PSI->isHotCount(TotalWeight)) {
-        Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
-      } else if (PSI->isFunctionEntryHot(&Callee)) {
-        // If callsite hotness can not be determined, we may still know
-        // that the callee is hot and treat it as a weaker hint for threshold
-        // increase.
-        Threshold = MaxIfValid(Threshold, Params.HintThreshold);
-      } else if (PSI->isFunctionEntryCold(&Callee)) {
-        Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+      BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
+      // FIXME: After switching to the new passmanager, simplify the logic below
+      // by checking only the callsite hotness/coldness. The check for CallerBFI
+      // exists only because we do not have BFI available with the old PM.
+      //
+      // Use callee's hotness information only if we have no way of determining
+      // callsite's hotness information. Callsite hotness can be determined if
+      // sample profile is used (which adds hotness metadata to calls) or if
+      // caller's BlockFrequencyInfo is available.
+      if (CallerBFI || PSI->hasSampleProfile()) {
+        if (PSI->isHotCallSite(CS, CallerBFI)) {
+          DEBUG(dbgs() << "Hot callsite.\n");
+          Threshold = Params.HotCallSiteThreshold.getValue();
+        } else if (isColdCallSite(CS, CallerBFI)) {
+          DEBUG(dbgs() << "Cold callsite.\n");
+          Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
+        }
+      } else {
+        if (PSI->isFunctionEntryHot(&Callee)) {
+          DEBUG(dbgs() << "Hot callee.\n");
+          // If callsite hotness can not be determined, we may still know
+          // that the callee is hot and treat it as a weaker hint for threshold
+          // increase.
+          Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+        } else if (PSI->isFunctionEntryCold(&Callee)) {
+          DEBUG(dbgs() << "Cold callee.\n");
+          Threshold = MinIfValid(Threshold, Params.ColdThreshold);
+        }
       }
     }
   }
@@ -665,20 +732,10 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   // First try to handle simplified comparisons.
-  if (!isa<Constant>(LHS))
-    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
-      LHS = SimpleLHS;
-  if (!isa<Constant>(RHS))
-    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
-      RHS = SimpleRHS;
-  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
-    if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      if (Constant *C =
-              ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
-        SimplifiedValues[&I] = C;
-        return true;
-      }
-  }
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
+      }))
+    return true;
 
   if (I.getOpcode() == Instruction::FCmp)
     return false;
@@ -756,24 +813,18 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
 
 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-  const DataLayout &DL = F.getParent()->getDataLayout();
-  if (!isa<Constant>(LHS))
-    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
-      LHS = SimpleLHS;
-  if (!isa<Constant>(RHS))
-    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
-      RHS = SimpleRHS;
-  Value *SimpleV = nullptr;
-  if (auto FI = dyn_cast<FPMathOperator>(&I))
-    SimpleV =
-        SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
-  else
-    SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+  auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
+    Value *SimpleV = nullptr;
+    if (auto FI = dyn_cast<FPMathOperator>(&I))
+      SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
+                                FI->getFastMathFlags(), DL);
+    else
+      SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
+    return dyn_cast_or_null<Constant>(SimpleV);
+  };
 
-  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
-    SimplifiedValues[&I] = C;
+  if (simplifyInstruction(I, Evaluate))
     return true;
-  }
 
   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
   disableSROA(LHS);
@@ -814,13 +865,10 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
 
 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
   // Constant folding for extract value is trivial.
-  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
-  if (!C)
-    C = SimplifiedValues.lookup(I.getAggregateOperand());
-  if (C) {
-    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getExtractValue(COps[0], I.getIndices());
+      }))
     return true;
-  }
 
   // SROA can look through these but give them a cost.
   return false;
@@ -828,17 +876,12 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
 
 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
   // Constant folding for insert value is trivial.
-  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
-  if (!AggC)
-    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
-  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
-  if (!InsertedC)
-    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
-  if (AggC && InsertedC) {
-    SimplifiedValues[&I] =
-        ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices());
+  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+        return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
+                                            /*InsertedValueOperand*/ COps[1],
+                                            I.getIndices());
+      }))
     return true;
-  }
 
   // SROA can look through these but give them a cost.
   return false;
@@ -855,7 +898,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
   // because we have to continually rebuild the argument list even when no
   // simplifications can be performed. Until that is fixed with remapping
   // inside of instsimplify, directly constant fold calls here.
-  if (!canConstantFoldCallTo(F))
+  if (!canConstantFoldCallTo(CS, F))
     return false;
 
   // Try to re-map the arguments to constants.
@@ -871,7 +914,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
 
     ConstantArgs.push_back(C);
   }
-  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
+  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
     SimplifiedValues[CS.getInstruction()] = C;
     return true;
   }
@@ -959,7 +1002,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // out. Pretend to inline the function, with a custom threshold.
   auto IndirectCallParams = Params;
   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
-  CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
+  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
+                  IndirectCallParams);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // threshold to get the bonus we want to apply, but don't go below zero.
@@ -995,22 +1039,74 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
     if (isa<ConstantInt>(V))
       return true;
 
-  // Otherwise, we need to accumulate a cost proportional to the number of
-  // distinct successor blocks. This fan-out in the CFG cannot be represented
-  // for free even if we can represent the core switch as a jumptable that
-  // takes a single instruction.
+  // Assume the most general case where the switch is lowered into
+  // either a jump table, bit test, or a balanced binary tree consisting of
+  // case clusters without merging adjacent clusters with the same
+  // destination. We do not consider the switches that are lowered with a mix
+  // of jump table/bit test/binary search tree. The cost of the switch is
+  // proportional to the size of the tree or the size of jump table range.
   //
   // NB: We convert large switches which are just used to initialize large phi
   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
   // inlining those. It will prevent inlining in cases where the optimization
   // does not (yet) fire.
-  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
-  SuccessorBlocks.insert(SI.getDefaultDest());
-  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
-    SuccessorBlocks.insert(I.getCaseSuccessor());
-  // Add cost corresponding to the number of distinct destinations. The first
-  // we model as free because of fallthrough.
-  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+
+  // Maximum valid cost increased in this function.
+  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
+
+  // Exit early for a large switch, assuming one case needs at least one
+  // instruction.
+  // FIXME: This is not true for a bit test, but ignore such case for now to
+  // save compile-time.
+  int64_t CostLowerBound =
+      std::min((int64_t)CostUpperBound,
+               (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
+
+  if (CostLowerBound > Threshold) {
+    Cost = CostLowerBound;
+    return false;
+  }
+
+  unsigned JumpTableSize = 0;
+  unsigned NumCaseCluster =
+      TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+
+  // If suitable for a jump table, consider the cost for the table size and
+  // branch to destination.
+  if (JumpTableSize) {
+    int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+                     4 * InlineConstants::InstrCost;
+
+    Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
+    return false;
+  }
+
+  // Considering forming a binary search, we should find the number of nodes
+  // which is same as the number of comparisons when lowered. For a given
+  // number of clusters, n, we can define a recursive function, f(n), to find
+  // the number of nodes in the tree. The recursion is :
+  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+  // and f(n) = n, when n <= 3.
+  // This will lead a binary tree where the leaf should be either f(2) or f(3)
+  // when n > 3.  So, the number of comparisons from leaves should be n, while
+  // the number of non-leaf should be :
+  //   2^(log2(n) - 1) - 1
+  //   = 2^log2(n) * 2^-1 - 1
+  //   = n / 2 - 1.
+  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+  // number of comparisons in a simple closed form :
+  //   n + n / 2 - 1 = n * 3 / 2 - 1
+  if (NumCaseCluster <= 3) {
+    // Suppose a comparison includes one compare and one conditional branch.
+    Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+    return false;
+  }
+
+  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
+  int64_t SwitchCost =
+      ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+
+  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
   return false;
 }
 
@@ -1098,19 +1194,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
     // is expensive or the function has the "use-soft-float" attribute, this may
     // eventually become a library call. Treat the cost as such.
     if (I->getType()->isFloatingPointTy()) {
-      bool hasSoftFloatAttr = false;
-
       // If the function has the "use-soft-float" attribute, mark it as
       // expensive.
-      if (F.hasFnAttribute("use-soft-float")) {
-        Attribute Attr = F.getFnAttribute("use-soft-float");
-        StringRef Val = Attr.getValueAsString();
-        if (Val == "true")
-          hasSoftFloatAttr = true;
-      }
-
       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
-          hasSoftFloatAttr)
+          (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
         Cost += InlineConstants::CallPenalty;
     }
 
@@ -1155,7 +1242,6 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
   if (!V->getType()->isPointerTy())
     return nullptr;
 
-  const DataLayout &DL = F.getParent()->getDataLayout();
   unsigned IntPtrWidth = DL.getPointerSizeInBits();
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
@@ -1212,7 +1298,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
 
   FiftyPercentVectorBonus = 3 * Threshold / 2;
   TenPercentVectorBonus = 3 * Threshold / 4;
-  const DataLayout &DL = F.getParent()->getDataLayout();
 
   // Track whether the post-inlining function would have more than one basic
   // block. A single basic block is often intended for inlining. Balloon the
@@ -1225,36 +1310,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // the rest of the function body.
   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
 
-  // Give out bonuses per argument, as the instructions setting them up will
-  // be gone after inlining.
-  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-    if (CS.isByValArgument(I)) {
-      // We approximate the number of loads and stores needed by dividing the
-      // size of the byval type by the target's pointer size.
-      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
-      unsigned PointerSize = DL.getPointerSizeInBits();
-      // Ceiling division.
-      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+  // Give out bonuses for the callsite, as the instructions setting them up
+  // will be gone after inlining.
+  Cost -= getCallsiteCost(CS, DL);
 
-      // If it generates more than 8 stores it is likely to be expanded as an
-      // inline memcpy so we take that as an upper bound. Otherwise we assume
-      // one load and one store per word copied.
-      // FIXME: The maxStoresPerMemcpy setting from the target should be used
-      // here instead of a magic number of 8, but it's not available via
-      // DataLayout.
-      NumStores = std::min(NumStores, 8U);
-
-      Cost -= 2 * NumStores * InlineConstants::InstrCost;
-    } else {
-      // For non-byval arguments subtract off one instruction per call
-      // argument.
-      Cost -= InlineConstants::InstrCost;
-    }
-  }
-  // The call instruction also disappears after inlining.
-  Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
-  
   // If there is only one call of the function, and it has internal linkage,
   // the cost of inlining it drops dramatically.
   bool OnlyOneCallAndLocalLinkage =
@@ -1371,7 +1430,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
       Value *Cond = SI->getCondition();
       if (ConstantInt *SimpleCond =
               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
-        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+        BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
         continue;
       }
     }
@@ -1430,13 +1489,6 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() {
 }
 #endif
 
-/// \brief Test that two functions either have or have not the given attribute
-///        at the same time.
-template <typename AttrKind>
-static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
-  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
-}
-
 /// \brief Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
 static bool functionsHaveCompatibleAttributes(Function *Caller,
@@ -1446,18 +1498,52 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
+int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
+  int Cost = 0;
+  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+    if (CS.isByValArgument(I)) {
+      // We approximate the number of loads and stores needed by dividing the
+      // size of the byval type by the target's pointer size.
+      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = DL.getPointerSizeInBits();
+      // Ceiling division.
+      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+      // If it generates more than 8 stores it is likely to be expanded as an
+      // inline memcpy so we take that as an upper bound. Otherwise we assume
+      // one load and one store per word copied.
+      // FIXME: The maxStoresPerMemcpy setting from the target should be used
+      // here instead of a magic number of 8, but it's not available via
+      // DataLayout.
+      NumStores = std::min(NumStores, 8U);
+
+      Cost += 2 * NumStores * InlineConstants::InstrCost;
+    } else {
+      // For non-byval arguments subtract off one instruction per call
+      // argument.
+      Cost += InlineConstants::InstrCost;
+    }
+  }
+  // The call instruction also disappears after inlining.
+  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
+  return Cost;
+}
+
 InlineCost llvm::getInlineCost(
     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
     ProfileSummaryInfo *PSI) {
   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
-                       GetAssumptionCache, PSI);
+                       GetAssumptionCache, GetBFI, PSI);
 }
 
 InlineCost llvm::getInlineCost(
     CallSite CS, Function *Callee, const InlineParams &Params,
     TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
     ProfileSummaryInfo *PSI) {
 
   // Cannot inline indirect calls.
@@ -1492,7 +1578,8 @@ InlineCost llvm::getInlineCost(
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                      << "...\n");
 
-  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
+  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
+                  Params);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
@@ -1565,7 +1652,9 @@ InlineParams llvm::getInlineParams(int Threshold) {
   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
   Params.HotCallSiteThreshold = HotCallSiteThreshold;
 
-  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
+  Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
+
   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
   // -inlinehint-threshold commandline option is not explicitly given. If that
   // option is present, then its value applies even for callees with size and
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
index de2b9c0..95ab6ee 100644
--- a/contrib/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Pass.h"
@@ -26,14 +26,12 @@ using namespace llvm;
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
 STATISTIC(TotalBlocks, "Number of basic blocks");
 STATISTIC(TotalFuncs , "Number of non-external functions");
-STATISTIC(TotalMemInst, "Number of memory instructions");
 
 #define HANDLE_INST(N, OPCODE, CLASS) \
   STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
 
 #include "llvm/IR/Instruction.def"
 
-
 namespace {
   class InstCount : public FunctionPass, public InstVisitor<InstCount> {
     friend class InstVisitor<InstCount>;
@@ -76,13 +74,6 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
 // function.
 //
 bool InstCount::runOnFunction(Function &F) {
-  unsigned StartMemInsts =
-    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst;
   visit(F);
-  unsigned EndMemInsts =
-    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst;
-  TotalMemInst += EndMemInsts-StartMemInsts;
   return false;
 }
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 796e6e4..b4f3b87 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -21,9 +21,12 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/ConstantRange.h"
@@ -34,6 +37,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/KnownBits.h"
 #include <algorithm>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -45,49 +49,30 @@ enum { RecursionLimit = 3 };
 STATISTIC(NumExpand,  "Number of expansions");
 STATISTIC(NumReassoc, "Number of reassociations");
 
-namespace {
-struct Query {
-  const DataLayout &DL;
-  const TargetLibraryInfo *TLI;
-  const DominatorTree *DT;
-  AssumptionCache *AC;
-  const Instruction *CxtI;
-
-  Query(const DataLayout &DL, const TargetLibraryInfo *tli,
-        const DominatorTree *dt, AssumptionCache *ac = nullptr,
-        const Instruction *cxti = nullptr)
-      : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
-};
-} // end anonymous namespace
-
-static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
                             unsigned);
 static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
-                              const Query &, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+                              const SimplifyQuery &, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
                               unsigned);
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse);
-static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+                               const SimplifyQuery &Q, unsigned MaxRecurse);
+static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
 static Value *SimplifyCastInst(unsigned, Value *, Type *,
-                               const Query &, unsigned);
+                               const SimplifyQuery &, unsigned);
 
-/// For a boolean type, or a vector of boolean type, return false, or
-/// a vector with every element false, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return false or a vector
+/// with every element false.
 static Constant *getFalse(Type *Ty) {
-  assert(Ty->getScalarType()->isIntegerTy(1) &&
-         "Expected i1 type or a vector of i1!");
-  return Constant::getNullValue(Ty);
+  return ConstantInt::getFalse(Ty);
 }
 
-/// For a boolean type, or a vector of boolean type, return true, or
-/// a vector with every element true, as appropriate for the type.
+/// For a boolean type or a vector of boolean type, return true or a vector
+/// with every element true.
 static Constant *getTrue(Type *Ty) {
-  assert(Ty->getScalarType()->isIntegerTy(1) &&
-         "Expected i1 type or a vector of i1!");
-  return Constant::getAllOnesValue(Ty);
+  return ConstantInt::getTrue(Ty);
 }
 
 /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
@@ -118,13 +103,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
     return false;
 
   // If we have a DominatorTree then do a precise test.
-  if (DT) {
-    if (!DT->isReachableFromEntry(P->getParent()))
-      return true;
-    if (!DT->isReachableFromEntry(I->getParent()))
-      return false;
+  if (DT)
     return DT->dominates(I, P);
-  }
 
   // Otherwise, if the instruction is in the entry block and is not an invoke,
   // then it obviously dominates all phi nodes.
@@ -140,10 +120,9 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
 /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
 /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
 /// Returns the simplified value, or null if no simplification was performed.
-static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                          unsigned OpcToExpand, const Query &Q,
-                          unsigned MaxRecurse) {
-  Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+                          Instruction::BinaryOps OpcodeToExpand,
+                          const SimplifyQuery &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return nullptr;
@@ -199,9 +178,10 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 
 /// Generic simplifications for associative binary operations.
 /// Returns the simpler value, or null if none was found.
-static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
-                                       const Query &Q, unsigned MaxRecurse) {
-  Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
+                                       Value *LHS, Value *RHS,
+                                       const SimplifyQuery &Q,
+                                       unsigned MaxRecurse) {
   assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
 
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -298,8 +278,9 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
 /// try to simplify the binop by seeing whether evaluating it on both branches
 /// of the select results in the same value. Returns the common value if so,
 /// otherwise returns null.
-static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
-                                    const Query &Q, unsigned MaxRecurse) {
+static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
+                                    Value *RHS, const SimplifyQuery &Q,
+                                    unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return nullptr;
@@ -370,7 +351,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
 /// comparison by seeing whether both branches of the select result in the same
 /// value. Returns the common value if so, otherwise returns null.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
-                                  Value *RHS, const Query &Q,
+                                  Value *RHS, const SimplifyQuery &Q,
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -451,8 +432,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 /// try to simplify the binop by seeing whether evaluating it on the incoming
 /// phi values yields the same result for every value. If so returns the common
 /// value, otherwise returns null.
-static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
-                                 const Query &Q, unsigned MaxRecurse) {
+static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
+                                 Value *RHS, const SimplifyQuery &Q,
+                                 unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return nullptr;
@@ -494,7 +476,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
 /// yields the same result every time. If so returns the common result,
 /// otherwise returns null.
 static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse) {
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return nullptr;
@@ -527,17 +509,26 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
   return CommonValue;
 }
 
+static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
+                                       Value *&Op0, Value *&Op1,
+                                       const SimplifyQuery &Q) {
+  if (auto *CLHS = dyn_cast<Constant>(Op0)) {
+    if (auto *CRHS = dyn_cast<Constant>(Op1))
+      return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
+
+    // Canonicalize the constant to the RHS if this is a commutative operation.
+    if (Instruction::isCommutative(Opcode))
+      std::swap(Op0, Op1);
+  }
+  return nullptr;
+}
+
 /// Given operands for an Add, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const Query &Q, unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::Add, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
+    return C;
 
   // X + undef -> undef
   if (match(Op1, m_Undef()))
@@ -556,12 +547,20 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
     return Y;
 
   // X + ~X -> -1   since   ~X = -X-1
+  Type *Ty = Op0->getType();
   if (match(Op0, m_Not(m_Specific(Op1))) ||
       match(Op1, m_Not(m_Specific(Op0))))
-    return Constant::getAllOnesValue(Op0->getType());
+    return Constant::getAllOnesValue(Ty);
+
+  // add nsw/nuw (xor Y, signmask), signmask --> Y
+  // The no-wrapping add guarantees that the top bit will be set by the add.
+  // Therefore, the xor must be clearing the already set sign bit of Y.
+  if ((isNSW || isNUW) && match(Op1, m_SignMask()) &&
+      match(Op0, m_Xor(m_Value(Y), m_SignMask())))
+    return Y;
 
   /// i1 add -> xor.
-  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+  if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
     if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
@@ -583,11 +582,8 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+                             const SimplifyQuery &Query) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit);
 }
 
 /// \brief Compute the base pointer and cumulative constant offsets for V.
@@ -602,7 +598,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// folding.
 static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
                                                 bool AllowNonInbounds = false) {
-  assert(V->getType()->getScalarType()->isPointerTy());
+  assert(V->getType()->isPtrOrPtrVectorTy());
 
   Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
   APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
@@ -631,8 +627,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
         }
       break;
     }
-    assert(V->getType()->getScalarType()->isPointerTy() &&
-           "Unexpected operand type!");
+    assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
   } while (Visited.insert(V).second);
 
   Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
@@ -664,10 +659,9 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
 /// Given operands for a Sub, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const Query &Q, unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0))
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::Sub, CLHS, CRHS, Q.DL);
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
+    return C;
 
   // X - undef -> undef
   // undef - X -> undef
@@ -688,11 +682,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
     if (isNUW)
       return Op0;
 
-    unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
-    if (KnownZero == ~APInt::getSignBit(BitWidth)) {
+    KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (Known.Zero.isMaxSignedValue()) {
       // Op1 is either 0 or the minimum signed value. If the sub is NSW, then
       // Op1 must be 0 because negating the minimum signed value is undefined.
       if (isNSW)
@@ -779,7 +770,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
       return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
 
   // i1 sub -> xor.
-  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+  if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
     if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
@@ -796,24 +787,16 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+                             const SimplifyQuery &Q) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
 }
 
 /// Given operands for an FAdd, see if we can fold the result.  If not, this
 /// returns null.
 static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const Query &Q, unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::FAdd, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
+    return C;
 
   // fadd X, -0 ==> X
   if (match(Op1, m_NegZero()))
@@ -845,11 +828,9 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 /// Given operands for an FSub, see if we can fold the result.  If not, this
 /// returns null.
 static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const Query &Q, unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::FSub, CLHS, CRHS, Q.DL);
-  }
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
+    return C;
 
   // fsub X, 0 ==> X
   if (match(Op1, m_Zero()))
@@ -878,40 +859,28 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 }
 
 /// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
-                               FastMathFlags FMF,
-                               const Query &Q,
-                               unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::FMul, CLHS, CRHS, Q.DL);
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+    return C;
 
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
- }
-
- // fmul X, 1.0 ==> X
- if (match(Op1, m_FPOne()))
-   return Op0;
+  // fmul X, 1.0 ==> X
+  if (match(Op1, m_FPOne()))
+    return Op0;
 
- // fmul nnan nsz X, 0 ==> 0
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
-   return Op1;
+  // fmul nnan nsz X, 0 ==> 0
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+    return Op1;
 
- return nullptr;
+  return nullptr;
 }
 
 /// Given operands for a Mul, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::Mul, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+  if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
+    return C;
 
   // X * undef -> 0
   if (match(Op1, m_Undef()))
@@ -932,7 +901,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
     return X;
 
   // i1 mul -> and.
-  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+  if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
     if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
@@ -964,77 +933,87 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
 }
 
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
+
 Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
 Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
 }
 
-/// Given operands for an SDiv or UDiv, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const Query &Q, unsigned MaxRecurse) {
-  if (Constant *C0 = dyn_cast<Constant>(Op0))
-    if (Constant *C1 = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
-
-  bool isSigned = Opcode == Instruction::SDiv;
+/// Check for common or similar folds of integer division or integer remainder.
+static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
+  Type *Ty = Op0->getType();
 
   // X / undef -> undef
+  // X % undef -> undef
   if (match(Op1, m_Undef()))
     return Op1;
 
-  // X / 0 -> undef, we don't need to preserve faults!
+  // X / 0 -> undef
+  // X % 0 -> undef
+  // We don't need to preserve faults!
   if (match(Op1, m_Zero()))
-    return UndefValue::get(Op1->getType());
+    return UndefValue::get(Ty);
+
+  // If any element of a constant divisor vector is zero, the whole op is undef.
+  auto *Op1C = dyn_cast<Constant>(Op1);
+  if (Op1C && Ty->isVectorTy()) {
+    unsigned NumElts = Ty->getVectorNumElements();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Elt = Op1C->getAggregateElement(i);
+      if (Elt && Elt->isNullValue())
+        return UndefValue::get(Ty);
+    }
+  }
 
   // undef / X -> 0
+  // undef % X -> 0
   if (match(Op0, m_Undef()))
-    return Constant::getNullValue(Op0->getType());
+    return Constant::getNullValue(Ty);
 
-  // 0 / X -> 0, we don't need to preserve faults!
+  // 0 / X -> 0
+  // 0 % X -> 0
   if (match(Op0, m_Zero()))
     return Op0;
 
+  // X / X -> 1
+  // X % X -> 0
+  if (Op0 == Op1)
+    return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty);
+
   // X / 1 -> X
-  if (match(Op1, m_One()))
-    return Op0;
+  // X % 1 -> 0
+  // If this is a boolean op (single-bit element type), we can't have
+  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+  if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1))
+    return IsDiv ? Op0 : Constant::getNullValue(Ty);
 
-  if (Op0->getType()->isIntegerTy(1))
-    // It can't be division by zero, hence it must be division by one.
-    return Op0;
+  return nullptr;
+}
 
-  // X / X -> 1
-  if (Op0 == Op1)
-    return ConstantInt::get(Op0->getType(), 1);
+/// Given operands for an SDiv or UDiv, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+    return C;
+
+  if (Value *V = simplifyDivRem(Op0, Op1, true))
+    return V;
+
+  bool isSigned = Opcode == Instruction::SDiv;
 
   // (X * Y) / Y -> X if the multiplication does not overflow.
   Value *X = nullptr, *Y = nullptr;
@@ -1061,7 +1040,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
       match(Op1, m_ConstantInt(C2))) {
     bool Overflow;
-    C1->getValue().umul_ov(C2->getValue(), Overflow);
+    (void)C1->getValue().umul_ov(C2->getValue(), Overflow);
     if (Overflow)
       return Constant::getNullValue(Op0->getType());
   }
@@ -1083,7 +1062,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// Given operands for an SDiv, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
     return V;
@@ -1091,17 +1070,13 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a UDiv, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
     return V;
@@ -1119,16 +1094,15 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
 }
 
 static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                               const Query &Q, unsigned) {
+                               const SimplifyQuery &Q, unsigned) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
+    return C;
+
   // undef / X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1166,49 +1140,19 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 }
 
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
 /// Given operands for an SRem or URem, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const Query &Q, unsigned MaxRecurse) {
-  if (Constant *C0 = dyn_cast<Constant>(Op0))
-    if (Constant *C1 = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
-
-  // X % undef -> undef
-  if (match(Op1, m_Undef()))
-    return Op1;
-
-  // undef % X -> 0
-  if (match(Op0, m_Undef()))
-    return Constant::getNullValue(Op0->getType());
-
-  // 0 % X -> 0, we don't need to preserve faults!
-  if (match(Op0, m_Zero()))
-    return Op0;
-
-  // X % 0 -> undef, we don't need to preserve faults!
-  if (match(Op1, m_Zero()))
-    return UndefValue::get(Op0->getType());
-
-  // X % 1 -> 0
-  if (match(Op1, m_One()))
-    return Constant::getNullValue(Op0->getType());
+                          const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+    return C;
 
-  if (Op0->getType()->isIntegerTy(1))
-    // It can't be remainder by zero, hence it must be remainder by one.
-    return Constant::getNullValue(Op0->getType());
-
-  // X % X -> 0
-  if (Op0 == Op1)
-    return Constant::getNullValue(Op0->getType());
+  if (Value *V = simplifyDivRem(Op0, Op1, false))
+    return V;
 
   // (X % Y) % Y -> X % Y
   if ((Opcode == Instruction::SRem &&
@@ -1234,7 +1178,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// Given operands for an SRem, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
     return V;
@@ -1242,17 +1186,13 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a URem, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
     return V;
@@ -1270,16 +1210,15 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
 }
 
 static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                               const Query &, unsigned) {
+                               const SimplifyQuery &Q, unsigned) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
+    return C;
+
   // undef % X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1298,12 +1237,8 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 }
 
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
 /// Returns true if a shift by \c Amount always yields undef.
@@ -1335,11 +1270,10 @@ static bool isUndefShift(Value *Amount) {
 
 /// Given operands for an Shl, LShr or AShr, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
-                            const Query &Q, unsigned MaxRecurse) {
-  if (Constant *C0 = dyn_cast<Constant>(Op0))
-    if (Constant *C1 = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
+static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
+                            Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+    return C;
 
   // 0 shift by X -> 0
   if (match(Op0, m_Zero()))
@@ -1367,18 +1301,14 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
 
   // If any bits in the shift amount make that value greater than or equal to
   // the number of bits in the type, the shift is undefined.
-  unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
-  APInt KnownZero(BitWidth, 0);
-  APInt KnownOne(BitWidth, 0);
-  computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
-  if (KnownOne.getLimitedValue() >= BitWidth)
+  KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+  if (Known.One.getLimitedValue() >= Known.getBitWidth())
     return UndefValue::get(Op0->getType());
 
   // If all valid bits in the shift amount are known zero, the first operand is
   // unchanged.
-  unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth);
-  APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits);
-  if ((KnownZero & ShiftAmountMask) == ShiftAmountMask)
+  unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth());
+  if (Known.countMinTrailingZeros() >= NumValidShiftBits)
     return Op0;
 
   return nullptr;
@@ -1386,8 +1316,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
 
 /// \brief Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
-                                 bool isExact, const Query &Q,
+static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
+                                 Value *Op1, bool isExact, const SimplifyQuery &Q,
                                  unsigned MaxRecurse) {
   if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
     return V;
@@ -1403,12 +1333,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
 
   // The low bit cannot be shifted out of an exact shift if it is set.
   if (isExact) {
-    unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
-    APInt Op0KnownZero(BitWidth, 0);
-    APInt Op0KnownOne(BitWidth, 0);
-    computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC,
-                     Q.CxtI, Q.DT);
-    if (Op0KnownOne[0])
+    KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+    if (Op0Known.One[0])
       return Op0;
   }
 
@@ -1418,7 +1344,7 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
 /// Given operands for an Shl, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const Query &Q, unsigned MaxRecurse) {
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
     return V;
 
@@ -1435,17 +1361,14 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+                             const SimplifyQuery &Q) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
 }
 
 /// Given operands for an LShr, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const Query &Q, unsigned MaxRecurse) {
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
                                     MaxRecurse))
       return V;
@@ -1459,18 +1382,14 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
 }
 
 /// Given operands for an AShr, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const Query &Q, unsigned MaxRecurse) {
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
                                     MaxRecurse))
     return V;
@@ -1493,14 +1412,12 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
 }
 
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
 static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
                                          ICmpInst *UnsignedICmp, bool IsAnd) {
   Value *X, *Y;
@@ -1569,29 +1486,75 @@ static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
 
 /// Commuted variants are assumed to be handled by calling this function again
 /// with the parameters swapped.
-static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
-    return X;
+static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
+  ICmpInst::Predicate Pred0, Pred1;
+  Value *A ,*B;
+  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+    return nullptr;
 
-  if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
-    return X;
+  // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
+  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+  // can eliminate Op0 from this 'or'.
+  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+    return Op1;
+
+  // Check for any combination of predicates that cover the entire range of
+  // possibilities.
+  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
+      (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
+    return getTrue(Op0->getType());
+
+  return nullptr;
+}
+
+/// Test if a pair of compares with a shared operand and 2 constants has an
+/// empty set intersection, full set union, or if one compare is a superset of
+/// the other.
+static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
+                                                bool IsAnd) {
+  // Look for this pattern: {and/or} (icmp X, C0), (icmp X, C1)).
+  if (Cmp0->getOperand(0) != Cmp1->getOperand(0))
+    return nullptr;
 
-  // Look for this pattern: (icmp V, C0) & (icmp V, C1)).
-  Type *ITy = Op0->getType();
-  ICmpInst::Predicate Pred0, Pred1;
   const APInt *C0, *C1;
-  Value *V;
-  if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) &&
-      match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) {
-    // Make a constant range that's the intersection of the two icmp ranges.
-    // If the intersection is empty, we know that the result is false.
-    auto Range0 = ConstantRange::makeAllowedICmpRegion(Pred0, *C0);
-    auto Range1 = ConstantRange::makeAllowedICmpRegion(Pred1, *C1);
-    if (Range0.intersectWith(Range1).isEmptySet())
-      return getFalse(ITy);
-  }
+  if (!match(Cmp0->getOperand(1), m_APInt(C0)) ||
+      !match(Cmp1->getOperand(1), m_APInt(C1)))
+    return nullptr;
+
+  auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0);
+  auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1);
+
+  // For and-of-compares, check if the intersection is empty:
+  // (icmp X, C0) && (icmp X, C1) --> empty set --> false
+  if (IsAnd && Range0.intersectWith(Range1).isEmptySet())
+    return getFalse(Cmp0->getType());
+
+  // For or-of-compares, check if the union is full:
+  // (icmp X, C0) || (icmp X, C1) --> full set --> true
+  if (!IsAnd && Range0.unionWith(Range1).isFullSet())
+    return getTrue(Cmp0->getType());
+
+  // Is one range a superset of the other?
+  // If this is and-of-compares, take the smaller set:
+  // (icmp sgt X, 4) && (icmp sgt X, 42) --> icmp sgt X, 42
+  // If this is or-of-compares, take the larger set:
+  // (icmp sgt X, 4) || (icmp sgt X, 42) --> icmp sgt X, 4
+  if (Range0.contains(Range1))
+    return IsAnd ? Cmp1 : Cmp0;
+  if (Range1.contains(Range0))
+    return IsAnd ? Cmp0 : Cmp1;
 
+  return nullptr;
+}
+
+static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
   // (icmp (add V, C0), C1) & (icmp V, C0)
+  ICmpInst::Predicate Pred0, Pred1;
+  const APInt *C0, *C1;
+  Value *V;
   if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
     return nullptr;
 
@@ -1602,6 +1565,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (AddInst->getOperand(1) != Op1->getOperand(1))
     return nullptr;
 
+  Type *ITy = Op0->getType();
   bool isNSW = AddInst->hasNoSignedWrap();
   bool isNUW = AddInst->hasNoUnsignedWrap();
 
@@ -1632,17 +1596,132 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   return nullptr;
 }
 
+static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+    return X;
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
+    return X;
+
+  if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
+    return X;
+  if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0))
+    return X;
+
+  if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
+    return X;
+
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
+    return X;
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
+    return X;
+
+  return nullptr;
+}
+
+static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+  // (icmp (add V, C0), C1) | (icmp V, C0)
+  ICmpInst::Predicate Pred0, Pred1;
+  const APInt *C0, *C1;
+  Value *V;
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
+    return nullptr;
+
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
+    return nullptr;
+
+  auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  if (AddInst->getOperand(1) != Op1->getOperand(1))
+    return nullptr;
+
+  Type *ITy = Op0->getType();
+  bool isNSW = AddInst->hasNoSignedWrap();
+  bool isNUW = AddInst->hasNoUnsignedWrap();
+
+  const APInt Delta = *C1 - *C0;
+  if (C0->isStrictlyPositive()) {
+    if (Delta == 2) {
+      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
+        return getTrue(ITy);
+      if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+        return getTrue(ITy);
+    }
+    if (Delta == 1) {
+      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
+        return getTrue(ITy);
+      if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+        return getTrue(ITy);
+    }
+  }
+  if (C0->getBoolValue() && isNUW) {
+    if (Delta == 2)
+      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
+        return getTrue(ITy);
+    if (Delta == 1)
+      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
+        return getTrue(ITy);
+  }
+
+  return nullptr;
+}
+
+static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+    return X;
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+    return X;
+
+  if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+    return X;
+  if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0))
+    return X;
+
+  if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
+    return X;
+
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
+    return X;
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
+    return X;
+
+  return nullptr;
+}
+
+static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) {
+  // Look through casts of the 'and' operands to find compares.
+  auto *Cast0 = dyn_cast<CastInst>(Op0);
+  auto *Cast1 = dyn_cast<CastInst>(Op1);
+  if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() &&
+      Cast0->getSrcTy() == Cast1->getSrcTy()) {
+    Op0 = Cast0->getOperand(0);
+    Op1 = Cast1->getOperand(0);
+  }
+
+  auto *Cmp0 = dyn_cast<ICmpInst>(Op0);
+  auto *Cmp1 = dyn_cast<ICmpInst>(Op1);
+  if (!Cmp0 || !Cmp1)
+    return nullptr;
+
+  Value *V =
+      IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1);
+  if (!V)
+    return nullptr;
+  if (!Cast0)
+    return V;
+
+  // If we looked through casts, we can only handle a constant simplification
+  // because we are not allowed to create a cast instruction here.
+  if (auto *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType());
+
+  return nullptr;
+}
+
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::And, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+  if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
+    return C;
 
   // X & undef -> 0
   if (match(Op1, m_Undef()))
@@ -1666,16 +1745,31 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
     return Constant::getNullValue(Op0->getType());
 
   // (A | ?) & A = A
-  Value *A = nullptr, *B = nullptr;
-  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
     return Op1;
 
   // A & (A | ?) = A
-  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
     return Op0;
 
+  // A mask that only clears known zeros of a shifted value is a no-op.
+  Value *X;
+  const APInt *Mask;
+  const APInt *ShAmt;
+  if (match(Op1, m_APInt(Mask))) {
+    // If all bits in the inverted and shifted mask are clear:
+    // and (shl X, ShAmt), Mask --> shl X, ShAmt
+    if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
+        (~(*Mask)).lshr(*ShAmt).isNullValue())
+      return Op0;
+
+    // If all bits in the inverted and shifted mask are clear:
+    // and (lshr X, ShAmt), Mask --> lshr X, ShAmt
+    if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+        (~(*Mask)).shl(*ShAmt).isNullValue())
+      return Op0;
+  }
+
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
@@ -1687,32 +1781,8 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
       return Op1;
   }
 
-  if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
-    if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
-      if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS))
-        return V;
-      if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS))
-        return V;
-    }
-  }
-
-  // The compares may be hidden behind casts. Look through those and try the
-  // same folds as above.
-  auto *Cast0 = dyn_cast<CastInst>(Op0);
-  auto *Cast1 = dyn_cast<CastInst>(Op1);
-  if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() &&
-      Cast0->getSrcTy() == Cast1->getSrcTy()) {
-    auto *Cmp0 = dyn_cast<ICmpInst>(Cast0->getOperand(0));
-    auto *Cmp1 = dyn_cast<ICmpInst>(Cast1->getOperand(0));
-    if (Cmp0 && Cmp1) {
-      Instruction::CastOps CastOpc = Cast0->getOpcode();
-      Type *ResultType = Cast0->getType();
-      if (auto *V = dyn_cast_or_null<Constant>(SimplifyAndOfICmps(Cmp0, Cmp1)))
-        return ConstantExpr::getCast(CastOpc, V, ResultType);
-      if (auto *V = dyn_cast_or_null<Constant>(SimplifyAndOfICmps(Cmp1, Cmp0)))
-        return ConstantExpr::getCast(CastOpc, V, ResultType);
-    }
-  }
+  if (Value *V = simplifyAndOrOfICmps(Op0, Op1, true))
+    return V;
 
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
@@ -1746,105 +1816,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
-}
-
-/// Commuted variants are assumed to be handled by calling this function again
-/// with the parameters swapped.
-static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
-  ICmpInst::Predicate Pred0, Pred1;
-  Value *A ,*B;
-  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
-      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
-    return nullptr;
-
-  // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
-  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
-  // can eliminate Op0 from this 'or'.
-  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
-    return Op1;
-
-  // Check for any combination of predicates that cover the entire range of
-  // possibilities.
-  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
-      (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
-      (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
-      (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
-    return getTrue(Op0->getType());
-
-  return nullptr;
-}
-
-/// Commuted variants are assumed to be handled by calling this function again
-/// with the parameters swapped.
-static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
-    return X;
-
-  if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
-    return X;
-
-  // (icmp (add V, C0), C1) | (icmp V, C0)
-  ICmpInst::Predicate Pred0, Pred1;
-  const APInt *C0, *C1;
-  Value *V;
-  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
-    return nullptr;
-
-  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
-    return nullptr;
-
-  auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
-  if (AddInst->getOperand(1) != Op1->getOperand(1))
-    return nullptr;
-
-  Type *ITy = Op0->getType();
-  bool isNSW = AddInst->hasNoSignedWrap();
-  bool isNUW = AddInst->hasNoUnsignedWrap();
-
-  const APInt Delta = *C1 - *C0;
-  if (C0->isStrictlyPositive()) {
-    if (Delta == 2) {
-      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
-        return getTrue(ITy);
-      if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
-        return getTrue(ITy);
-    }
-    if (Delta == 1) {
-      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
-        return getTrue(ITy);
-      if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
-        return getTrue(ITy);
-    }
-  }
-  if (C0->getBoolValue() && isNUW) {
-    if (Delta == 2)
-      if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
-        return getTrue(ITy);
-    if (Delta == 1)
-      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
-        return getTrue(ITy);
-  }
-
-  return nullptr;
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for an Or, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                              unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::Or, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+  if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
+    return C;
 
   // X | undef -> -1
   if (match(Op1, m_Undef()))
@@ -1868,34 +1849,61 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
     return Constant::getAllOnesValue(Op0->getType());
 
   // (A & ?) | A = A
-  Value *A = nullptr, *B = nullptr;
-  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_c_And(m_Specific(Op1), m_Value())))
     return Op1;
 
   // A | (A & ?) = A
-  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_c_And(m_Specific(Op0), m_Value())))
     return Op0;
 
   // ~(A & ?) | A = -1
-  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
     return Constant::getAllOnesValue(Op1->getType());
 
   // A | ~(A & ?) = -1
-  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
     return Constant::getAllOnesValue(Op0->getType());
 
-  if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
-    if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
-      if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS))
-        return V;
-      if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS))
-        return V;
-    }
-  }
+  Value *A, *B;
+  // (A & ~B) | (A ^ B) -> (A ^ B)
+  // (~B & A) | (A ^ B) -> (A ^ B)
+  // (A & ~B) | (B ^ A) -> (B ^ A)
+  // (~B & A) | (B ^ A) -> (B ^ A)
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+      (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
+       match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
+    return Op1;
+
+  // Commute the 'or' operands.
+  // (A ^ B) | (A & ~B) -> (A ^ B)
+  // (A ^ B) | (~B & A) -> (A ^ B)
+  // (B ^ A) | (A & ~B) -> (B ^ A)
+  // (B ^ A) | (~B & A) -> (B ^ A)
+  if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+      (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
+       match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
+    return Op0;
+
+  // (A & B) | (~A ^ B) -> (~A ^ B)
+  // (B & A) | (~A ^ B) -> (~A ^ B)
+  // (A & B) | (B ^ ~A) -> (B ^ ~A)
+  // (B & A) | (B ^ ~A) -> (B ^ ~A)
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
+       match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
+    return Op1;
+
+  // (~A ^ B) | (A & B) -> (~A ^ B)
+  // (~A ^ B) | (B & A) -> (~A ^ B)
+  // (B ^ ~A) | (A & B) -> (B ^ ~A)
+  // (B ^ ~A) | (B & A) -> (B ^ ~A)
+  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+      (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
+       match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
+    return Op0;
+
+  if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false))
+    return V;
 
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
@@ -1914,37 +1922,27 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
                                          MaxRecurse))
       return V;
 
-  // (A & C)|(B & D)
-  Value *C = nullptr, *D = nullptr;
-  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
-      match(Op1, m_And(m_Value(B), m_Value(D)))) {
-    ConstantInt *C1 = dyn_cast<ConstantInt>(C);
-    ConstantInt *C2 = dyn_cast<ConstantInt>(D);
-    if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+  // (A & C1)|(B & C2)
+  const APInt *C1, *C2;
+  if (match(Op0, m_And(m_Value(A), m_APInt(C1))) &&
+      match(Op1, m_And(m_Value(B), m_APInt(C2)))) {
+    if (*C1 == ~*C2) {
       // (A & C1)|(B & C2)
       // If we have: ((V + N) & C1) | (V & C2)
       // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
       // replace with V+N.
-      Value *V1, *V2;
-      if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
-          match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+      Value *N;
+      if (C2->isMask() && // C2 == 0+1+
+          match(A, m_c_Add(m_Specific(B), m_Value(N)))) {
         // Add commutes, try both ways.
-        if (V1 == B &&
-            MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-          return A;
-        if (V2 == B &&
-            MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
           return A;
       }
       // Or commutes, try both ways.
-      if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
-          match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+      if (C1->isMask() &&
+          match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
         // Add commutes, try both ways.
-        if (V1 == A &&
-            MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-          return B;
-        if (V2 == A &&
-            MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+        if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
           return B;
       }
     }
@@ -1959,25 +1957,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                            const TargetLibraryInfo *TLI,
-                            const DominatorTree *DT, AssumptionCache *AC,
-                            const Instruction *CxtI) {
-  return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                          RecursionLimit);
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
 }
 
 /// Given operands for a Xor, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
-  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
-    if (Constant *CRHS = dyn_cast<Constant>(Op1))
-      return ConstantFoldBinaryOpOperands(Instruction::Xor, CLHS, CRHS, Q.DL);
-
-    // Canonicalize the constant to the RHS.
-    std::swap(Op0, Op1);
-  }
+  if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
+    return C;
 
   // A ^ undef -> undef
   if (match(Op1, m_Undef()))
@@ -2013,14 +2002,11 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+  return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
 }
 
+
 static Type *GetCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
@@ -2254,34 +2240,55 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
 
 /// Fold an icmp when its operands have i1 scalar type.
 static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
-                                  Value *RHS, const Query &Q) {
+                                  Value *RHS, const SimplifyQuery &Q) {
   Type *ITy = GetCompareTy(LHS); // The return type.
   Type *OpTy = LHS->getType();   // The operand type.
-  if (!OpTy->getScalarType()->isIntegerTy(1))
+  if (!OpTy->isIntOrIntVectorTy(1))
     return nullptr;
 
-  switch (Pred) {
-  default:
-    break;
-  case ICmpInst::ICMP_EQ:
-    // X == 1 -> X
-    if (match(RHS, m_One()))
-      return LHS;
-    break;
-  case ICmpInst::ICMP_NE:
-    // X != 0 -> X
-    if (match(RHS, m_Zero()))
+  // A boolean compared to true/false can be simplified in 14 out of the 20
+  // (10 predicates * 2 constants) possible combinations. Cases not handled here
+  // require a 'not' of the LHS, so those must be transformed in InstCombine.
+  if (match(RHS, m_Zero())) {
+    switch (Pred) {
+    case CmpInst::ICMP_NE:  // X !=  0 -> X
+    case CmpInst::ICMP_UGT: // X >u  0 -> X
+    case CmpInst::ICMP_SLT: // X <s  0 -> X
       return LHS;
-    break;
-  case ICmpInst::ICMP_UGT:
-    // X >u 0 -> X
-    if (match(RHS, m_Zero()))
+
+    case CmpInst::ICMP_ULT: // X <u  0 -> false
+    case CmpInst::ICMP_SGT: // X >s  0 -> false
+      return getFalse(ITy);
+
+    case CmpInst::ICMP_UGE: // X >=u 0 -> true
+    case CmpInst::ICMP_SLE: // X <=s 0 -> true
+      return getTrue(ITy);
+
+    default: break;
+    }
+  } else if (match(RHS, m_One())) {
+    switch (Pred) {
+    case CmpInst::ICMP_EQ:  // X ==   1 -> X
+    case CmpInst::ICMP_UGE: // X >=u  1 -> X
+    case CmpInst::ICMP_SLE: // X <=s -1 -> X
       return LHS;
+
+    case CmpInst::ICMP_UGT: // X >u   1 -> false
+    case CmpInst::ICMP_SLT: // X <s  -1 -> false
+      return getFalse(ITy);
+
+    case CmpInst::ICMP_ULE: // X <=u  1 -> true
+    case CmpInst::ICMP_SGE: // X >=s -1 -> true
+      return getTrue(ITy);
+
+    default: break;
+    }
+  }
+
+  switch (Pred) {
+  default:
     break;
   case ICmpInst::ICMP_UGE:
-    // X >=u 1 -> X
-    if (match(RHS, m_One()))
-      return LHS;
     if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
       return getTrue(ITy);
     break;
@@ -2296,16 +2303,6 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
     if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
       return getTrue(ITy);
     break;
-  case ICmpInst::ICMP_SLT:
-    // X <s 0 -> X
-    if (match(RHS, m_Zero()))
-      return LHS;
-    break;
-  case ICmpInst::ICMP_SLE:
-    // X <=s -1 -> X
-    if (match(RHS, m_One()))
-      return LHS;
-    break;
   case ICmpInst::ICMP_ULE:
     if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
       return getTrue(ITy);
@@ -2317,12 +2314,11 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
 
 /// Try hard to fold icmp with zero RHS because this is a common case.
 static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
-                                   Value *RHS, const Query &Q) {
+                                   Value *RHS, const SimplifyQuery &Q) {
   if (!match(RHS, m_Zero()))
     return nullptr;
 
   Type *ITy = GetCompareTy(LHS); // The return type.
-  bool LHSKnownNonNegative, LHSKnownNegative;
   switch (Pred) {
   default:
     llvm_unreachable("Unknown ICmp predicate!");
@@ -2340,43 +2336,202 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
     if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
       return getTrue(ITy);
     break;
-  case ICmpInst::ICMP_SLT:
-    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                   Q.CxtI, Q.DT);
-    if (LHSKnownNegative)
+  case ICmpInst::ICMP_SLT: {
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (LHSKnown.isNegative())
       return getTrue(ITy);
-    if (LHSKnownNonNegative)
+    if (LHSKnown.isNonNegative())
       return getFalse(ITy);
     break;
-  case ICmpInst::ICMP_SLE:
-    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                   Q.CxtI, Q.DT);
-    if (LHSKnownNegative)
+  }
+  case ICmpInst::ICMP_SLE: {
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (LHSKnown.isNegative())
       return getTrue(ITy);
-    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (LHSKnown.isNonNegative() &&
+        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
       return getFalse(ITy);
     break;
-  case ICmpInst::ICMP_SGE:
-    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                   Q.CxtI, Q.DT);
-    if (LHSKnownNegative)
+  }
+  case ICmpInst::ICMP_SGE: {
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (LHSKnown.isNegative())
       return getFalse(ITy);
-    if (LHSKnownNonNegative)
+    if (LHSKnown.isNonNegative())
       return getTrue(ITy);
     break;
-  case ICmpInst::ICMP_SGT:
-    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                   Q.CxtI, Q.DT);
-    if (LHSKnownNegative)
+  }
+  case ICmpInst::ICMP_SGT: {
+    KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (LHSKnown.isNegative())
       return getFalse(ITy);
-    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (LHSKnown.isNonNegative() &&
+        isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
       return getTrue(ITy);
     break;
   }
+  }
 
   return nullptr;
 }
 
+/// Many binary operators with a constant operand have an easy-to-compute
+/// range of outputs. This can be used to fold a comparison to always true or
+/// always false.
+static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
+  unsigned Width = Lower.getBitWidth();
+  const APInt *C;
+  switch (BO.getOpcode()) {
+  case Instruction::Add:
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+      // FIXME: If we have both nuw and nsw, we should reduce the range further.
+      if (BO.hasNoUnsignedWrap()) {
+        // 'add nuw x, C' produces [C, UINT_MAX].
+        Lower = *C;
+      } else if (BO.hasNoSignedWrap()) {
+        if (C->isNegative()) {
+          // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
+          Lower = APInt::getSignedMinValue(Width);
+          Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+        } else {
+          // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
+          Lower = APInt::getSignedMinValue(Width) + *C;
+          Upper = APInt::getSignedMaxValue(Width) + 1;
+        }
+      }
+    }
+    break;
+
+  case Instruction::And:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'and x, C' produces [0, C].
+      Upper = *C + 1;
+    break;
+
+  case Instruction::Or:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'or x, C' produces [C, UINT_MAX].
+      Lower = *C;
+    break;
+
+  case Instruction::AShr:
+    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+      // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
+      Lower = APInt::getSignedMinValue(Width).ashr(*C);
+      Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      unsigned ShiftAmount = Width - 1;
+      if (!C->isNullValue() && BO.isExact())
+        ShiftAmount = C->countTrailingZeros();
+      if (C->isNegative()) {
+        // 'ashr C, x' produces [C, C >> (Width-1)]
+        Lower = *C;
+        Upper = C->ashr(ShiftAmount) + 1;
+      } else {
+        // 'ashr C, x' produces [C >> (Width-1), C]
+        Lower = C->ashr(ShiftAmount);
+        Upper = *C + 1;
+      }
+    }
+    break;
+
+  case Instruction::LShr:
+    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+      // 'lshr x, C' produces [0, UINT_MAX >> C].
+      Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      // 'lshr C, x' produces [C >> (Width-1), C].
+      unsigned ShiftAmount = Width - 1;
+      if (!C->isNullValue() && BO.isExact())
+        ShiftAmount = C->countTrailingZeros();
+      Lower = C->lshr(ShiftAmount);
+      Upper = *C + 1;
+    }
+    break;
+
+  case Instruction::Shl:
+    if (match(BO.getOperand(0), m_APInt(C))) {
+      if (BO.hasNoUnsignedWrap()) {
+        // 'shl nuw C, x' produces [C, C << CLZ(C)]
+        Lower = *C;
+        Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+      } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
+        if (C->isNegative()) {
+          // 'shl nsw C, x' produces [C << CLO(C)-1, C]
+          unsigned ShiftAmount = C->countLeadingOnes() - 1;
+          Lower = C->shl(ShiftAmount);
+          Upper = *C + 1;
+        } else {
+          // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
+          unsigned ShiftAmount = C->countLeadingZeros() - 1;
+          Lower = *C;
+          Upper = C->shl(ShiftAmount) + 1;
+        }
+      }
+    }
+    break;
+
+  case Instruction::SDiv:
+    if (match(BO.getOperand(1), m_APInt(C))) {
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (C->isAllOnesValue()) {
+        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+        //    where C != -1 and C != 0 and C != 1
+        Lower = IntMin + 1;
+        Upper = IntMax + 1;
+      } else if (C->countLeadingZeros() < Width - 1) {
+        // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
+        //    where C != -1 and C != 0 and C != 1
+        Lower = IntMin.sdiv(*C);
+        Upper = IntMax.sdiv(*C);
+        if (Lower.sgt(Upper))
+          std::swap(Lower, Upper);
+        Upper = Upper + 1;
+        assert(Upper != Lower && "Upper part of range has wrapped!");
+      }
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      if (C->isMinSignedValue()) {
+        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+        Lower = *C;
+        Upper = Lower.lshr(1) + 1;
+      } else {
+        // 'sdiv C, x' produces [-|C|, |C|].
+        Upper = C->abs() + 1;
+        Lower = (-Upper) + 1;
+      }
+    }
+    break;
+
+  case Instruction::UDiv:
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+      // 'udiv x, C' produces [0, UINT_MAX / C].
+      Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      // 'udiv C, x' produces [0, C].
+      Upper = *C + 1;
+    }
+    break;
+
+  case Instruction::SRem:
+    if (match(BO.getOperand(1), m_APInt(C))) {
+      // 'srem x, C' produces (-|C|, |C|).
+      Upper = C->abs();
+      Lower = (-Upper) + 1;
+    }
+    break;
+
+  case Instruction::URem:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'urem x, C' produces [0, C).
+      Upper = *C;
+    break;
+
+  default:
+    break;
+  }
+}
+
 static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
                                        Value *RHS) {
   const APInt *C;
@@ -2390,114 +2545,12 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
   if (RHS_CR.isFullSet())
     return ConstantInt::getTrue(GetCompareTy(RHS));
 
-  // Many binary operators with constant RHS have easy to compute constant
-  // range.  Use them to check whether the comparison is a tautology.
+  // Find the range of possible values for binary operators.
   unsigned Width = C->getBitWidth();
   APInt Lower = APInt(Width, 0);
   APInt Upper = APInt(Width, 0);
-  const APInt *C2;
-  if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) {
-    // 'urem x, C2' produces [0, C2).
-    Upper = *C2;
-  } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) {
-    // 'srem x, C2' produces (-|C2|, |C2|).
-    Upper = C2->abs();
-    Lower = (-Upper) + 1;
-  } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) {
-    // 'udiv C2, x' produces [0, C2].
-    Upper = *C2 + 1;
-  } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) {
-    // 'udiv x, C2' produces [0, UINT_MAX / C2].
-    APInt NegOne = APInt::getAllOnesValue(Width);
-    if (*C2 != 0)
-      Upper = NegOne.udiv(*C2) + 1;
-  } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) {
-    if (C2->isMinSignedValue()) {
-      // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
-      Lower = *C2;
-      Upper = Lower.lshr(1) + 1;
-    } else {
-      // 'sdiv C2, x' produces [-|C2|, |C2|].
-      Upper = C2->abs() + 1;
-      Lower = (-Upper) + 1;
-    }
-  } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) {
-    APInt IntMin = APInt::getSignedMinValue(Width);
-    APInt IntMax = APInt::getSignedMaxValue(Width);
-    if (C2->isAllOnesValue()) {
-      // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
-      //    where C2 != -1 and C2 != 0 and C2 != 1
-      Lower = IntMin + 1;
-      Upper = IntMax + 1;
-    } else if (C2->countLeadingZeros() < Width - 1) {
-      // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2]
-      //    where C2 != -1 and C2 != 0 and C2 != 1
-      Lower = IntMin.sdiv(*C2);
-      Upper = IntMax.sdiv(*C2);
-      if (Lower.sgt(Upper))
-        std::swap(Lower, Upper);
-      Upper = Upper + 1;
-      assert(Upper != Lower && "Upper part of range has wrapped!");
-    }
-  } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) {
-    // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)]
-    Lower = *C2;
-    Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
-  } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) {
-    if (C2->isNegative()) {
-      // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2]
-      unsigned ShiftAmount = C2->countLeadingOnes() - 1;
-      Lower = C2->shl(ShiftAmount);
-      Upper = *C2 + 1;
-    } else {
-      // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1]
-      unsigned ShiftAmount = C2->countLeadingZeros() - 1;
-      Lower = *C2;
-      Upper = C2->shl(ShiftAmount) + 1;
-    }
-  } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) {
-    // 'lshr x, C2' produces [0, UINT_MAX >> C2].
-    APInt NegOne = APInt::getAllOnesValue(Width);
-    if (C2->ult(Width))
-      Upper = NegOne.lshr(*C2) + 1;
-  } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) {
-    // 'lshr C2, x' produces [C2 >> (Width-1), C2].
-    unsigned ShiftAmount = Width - 1;
-    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
-      ShiftAmount = C2->countTrailingZeros();
-    Lower = C2->lshr(ShiftAmount);
-    Upper = *C2 + 1;
-  } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) {
-    // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2].
-    APInt IntMin = APInt::getSignedMinValue(Width);
-    APInt IntMax = APInt::getSignedMaxValue(Width);
-    if (C2->ult(Width)) {
-      Lower = IntMin.ashr(*C2);
-      Upper = IntMax.ashr(*C2) + 1;
-    }
-  } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) {
-    unsigned ShiftAmount = Width - 1;
-    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
-      ShiftAmount = C2->countTrailingZeros();
-    if (C2->isNegative()) {
-      // 'ashr C2, x' produces [C2, C2 >> (Width-1)]
-      Lower = *C2;
-      Upper = C2->ashr(ShiftAmount) + 1;
-    } else {
-      // 'ashr C2, x' produces [C2 >> (Width-1), C2]
-      Lower = C2->ashr(ShiftAmount);
-      Upper = *C2 + 1;
-    }
-  } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) {
-    // 'or x, C2' produces [C2, UINT_MAX].
-    Lower = *C2;
-  } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) {
-    // 'and x, C2' produces [0, C2].
-    Upper = *C2 + 1;
-  } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) {
-    // 'add nuw x, C2' produces [C2, UINT_MAX].
-    Lower = *C2;
-  }
+  if (auto *BO = dyn_cast<BinaryOperator>(LHS))
+    setLimitsForBinOp(*BO, Lower, Upper);
 
   ConstantRange LHS_CR =
       Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
@@ -2516,8 +2569,11 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
   return nullptr;
 }
 
+/// TODO: A large part of this logic is duplicated in InstCombine's
+/// foldICmpBinOp(). We should be able to share that and avoid the code
+/// duplication.
 static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
-                                    Value *RHS, const Query &Q,
+                                    Value *RHS, const SimplifyQuery &Q,
                                     unsigned MaxRecurse) {
   Type *ITy = GetCompareTy(LHS); // The return type.
 
@@ -2597,15 +2653,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
         return getTrue(ITy);
 
       if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
-        bool RHSKnownNonNegative, RHSKnownNegative;
-        bool YKnownNonNegative, YKnownNegative;
-        ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0,
-                       Q.AC, Q.CxtI, Q.DT);
-        ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC,
-                       Q.CxtI, Q.DT);
-        if (RHSKnownNonNegative && YKnownNegative)
+        KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        if (RHSKnown.isNonNegative() && YKnown.isNegative())
           return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
-        if (RHSKnownNegative || YKnownNonNegative)
+        if (RHSKnown.isNegative() || YKnown.isNonNegative())
           return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy);
       }
     }
@@ -2617,31 +2669,25 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
         return getFalse(ITy);
 
       if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) {
-        bool LHSKnownNonNegative, LHSKnownNegative;
-        bool YKnownNonNegative, YKnownNegative;
-        ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0,
-                       Q.AC, Q.CxtI, Q.DT);
-        ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC,
-                       Q.CxtI, Q.DT);
-        if (LHSKnownNonNegative && YKnownNegative)
+        KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+        if (LHSKnown.isNonNegative() && YKnown.isNegative())
           return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy);
-        if (LHSKnownNegative || YKnownNonNegative)
+        if (LHSKnown.isNegative() || YKnown.isNonNegative())
           return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy);
       }
     }
   }
 
   // icmp pred (and X, Y), X
-  if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)),
-                                    m_And(m_Specific(RHS), m_Value())))) {
+  if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) {
     if (Pred == ICmpInst::ICMP_UGT)
       return getFalse(ITy);
     if (Pred == ICmpInst::ICMP_ULE)
       return getTrue(ITy);
   }
   // icmp pred X, (and X, Y)
-  if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)),
-                                    m_And(m_Specific(LHS), m_Value())))) {
+  if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) {
     if (Pred == ICmpInst::ICMP_UGE)
       return getTrue(ITy);
     if (Pred == ICmpInst::ICMP_ULT)
@@ -2672,28 +2718,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
 
   // icmp pred (urem X, Y), Y
   if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
-    bool KnownNonNegative, KnownNegative;
     switch (Pred) {
     default:
       break;
     case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (!KnownNonNegative)
+    case ICmpInst::ICMP_SGE: {
+      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
+    }
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
       return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (!KnownNonNegative)
+    case ICmpInst::ICMP_SLE: {
+      KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
+    }
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2703,28 +2748,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
 
   // icmp pred X, (urem Y, X)
   if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
-    bool KnownNonNegative, KnownNegative;
     switch (Pred) {
     default:
       break;
     case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (!KnownNonNegative)
+    case ICmpInst::ICMP_SGE: {
+      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
+    }
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
       return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (!KnownNonNegative)
+    case ICmpInst::ICMP_SLE: {
+      KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+      if (!Known.isNonNegative())
         break;
       LLVM_FALLTHROUGH;
+    }
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2773,14 +2817,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
         // - CI2 is one
         // - CI isn't zero
         if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
-            *CI2Val == 1 || !CI->isZero()) {
+            CI2Val->isOneValue() || !CI->isZero()) {
           if (Pred == ICmpInst::ICMP_EQ)
             return ConstantInt::getFalse(RHS->getContext());
           if (Pred == ICmpInst::ICMP_NE)
             return ConstantInt::getTrue(RHS->getContext());
         }
       }
-      if (CIVal->isSignBit() && *CI2Val == 1) {
+      if (CIVal->isSignMask() && CI2Val->isOneValue()) {
         if (Pred == ICmpInst::ICMP_UGT)
           return ConstantInt::getFalse(RHS->getContext());
         if (Pred == ICmpInst::ICMP_ULE)
@@ -2796,10 +2840,19 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
       break;
     case Instruction::UDiv:
     case Instruction::LShr:
-      if (ICmpInst::isSigned(Pred))
+      if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact())
         break;
-      LLVM_FALLTHROUGH;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
+          return V;
+      break;
     case Instruction::SDiv:
+      if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact())
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
+        return V;
+      break;
     case Instruction::AShr:
       if (!LBO->isExact() || !RBO->isExact())
         break;
@@ -2827,7 +2880,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
 /// Simplify integer comparisons where at least one operand of the compare
 /// matches an integer min/max idiom.
 static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
-                                     Value *RHS, const Query &Q,
+                                     Value *RHS, const SimplifyQuery &Q,
                                      unsigned MaxRecurse) {
   Type *ITy = GetCompareTy(LHS); // The return type.
   Value *A, *B;
@@ -3031,7 +3084,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
 /// Given operands for an ICmpInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse) {
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
 
@@ -3064,8 +3117,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If both operands have range metadata, use the metadata
   // to simplify the comparison.
   if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
-    auto RHS_Instr = dyn_cast<Instruction>(RHS);
-    auto LHS_Instr = dyn_cast<Instruction>(LHS);
+    auto RHS_Instr = cast<Instruction>(RHS);
+    auto LHS_Instr = cast<Instruction>(LHS);
 
     if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
         LHS_Instr->getMetadata(LLVMContext::MD_range)) {
@@ -3245,11 +3298,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // icmp eq|ne X, Y -> false|true if X != Y
-  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+  if (ICmpInst::isEquality(Pred) &&
       isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
-    LLVMContext &Ctx = LHS->getType()->getContext();
-    return Pred == ICmpInst::ICMP_NE ?
-      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+    return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
   }
 
   if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
@@ -3297,22 +3348,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
-  // If a bit is known to be zero for A and known to be one for B,
-  // then A and B cannot be equal.
-  if (ICmpInst::isEquality(Pred)) {
-    const APInt *RHSVal;
-    if (match(RHS, m_APInt(RHSVal))) {
-      unsigned BitWidth = RHSVal->getBitWidth();
-      APInt LHSKnownZero(BitWidth, 0);
-      APInt LHSKnownOne(BitWidth, 0);
-      computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
-                       Q.CxtI, Q.DT);
-      if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0))
-        return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
-                                         : ConstantInt::getTrue(ITy);
-    }
-  }
-
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -3329,18 +3364,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
 }
 
 /// Given operands for an FCmpInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               FastMathFlags FMF, const Query &Q,
+                               FastMathFlags FMF, const SimplifyQuery &Q,
                                unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
@@ -3462,22 +3493,22 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              FastMathFlags FMF, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
-                            Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+                              FastMathFlags FMF, const SimplifyQuery &Q) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
 /// See if V simplifies when its operand Op is replaced with RepOp.
 static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                           const Query &Q,
+                                           const SimplifyQuery &Q,
                                            unsigned MaxRecurse) {
   // Trivial replacement.
   if (V == Op)
     return RepOp;
 
+  // We cannot replace a constant, and shouldn't even try.
+  if (isa<Constant>(Op))
+    return nullptr;
+
   auto *I = dyn_cast<Instruction>(V);
   if (!I)
     return nullptr;
@@ -3620,7 +3651,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
 /// Try to simplify a select instruction when its condition operand is an
 /// integer comparison.
 static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
-                                         Value *FalseVal, const Query &Q,
+                                         Value *FalseVal, const SimplifyQuery &Q,
                                          unsigned MaxRecurse) {
   ICmpInst::Predicate Pred;
   Value *CmpLHS, *CmpRHS;
@@ -3699,7 +3730,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
-                                 Value *FalseVal, const Query &Q,
+                                 Value *FalseVal, const SimplifyQuery &Q,
                                  unsigned MaxRecurse) {
   // select true, X, Y  -> X
   // select false, X, Y -> Y
@@ -3715,9 +3746,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
     return TrueVal;
 
   if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
-    if (isa<Constant>(TrueVal))
-      return TrueVal;
-    return FalseVal;
+    if (isa<Constant>(FalseVal))
+      return FalseVal;
+    return TrueVal;
   }
   if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
     return FalseVal;
@@ -3732,18 +3763,14 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
 }
 
 Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                                const DataLayout &DL,
-                                const TargetLibraryInfo *TLI,
-                                const DominatorTree *DT, AssumptionCache *AC,
-                                const Instruction *CxtI) {
-  return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
-                              Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+                                const SimplifyQuery &Q) {
+  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
 }
 
 /// Given operands for an GetElementPtrInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
-                              const Query &Q, unsigned) {
+                              const SimplifyQuery &Q, unsigned) {
   // The type of the GEP pointer operand.
   unsigned AS =
       cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
@@ -3757,6 +3784,8 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
     GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+  else if (VectorType *VT = dyn_cast<VectorType>(Ops[1]->getType()))
+    GEPTy = VectorType::get(GEPTy, VT->getNumElements());
 
   if (isa<UndefValue>(Ops[0]))
     return UndefValue::get(GEPTy);
@@ -3842,27 +3871,25 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
   }
 
   // Check to see if this is constant foldable.
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    if (!isa<Constant>(Ops[i]))
-      return nullptr;
+  if (!all_of(Ops, [](Value *V) { return isa<Constant>(V); }))
+    return nullptr;
 
-  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
-                                        Ops.slice(1));
+  auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                            Ops.slice(1));
+  if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL))
+    return CEFolded;
+  return CE;
 }
 
 Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
-                             const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyGEPInst(SrcTy, Ops,
-                           Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+                             const SimplifyQuery &Q) {
+  return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit);
 }
 
 /// Given operands for an InsertValueInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
-                                      ArrayRef<unsigned> Idxs, const Query &Q,
+                                      ArrayRef<unsigned> Idxs, const SimplifyQuery &Q,
                                       unsigned) {
   if (Constant *CAgg = dyn_cast<Constant>(Agg))
     if (Constant *CVal = dyn_cast<Constant>(Val))
@@ -3888,18 +3915,16 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
   return nullptr;
 }
 
-Value *llvm::SimplifyInsertValueInst(
-    Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
-    const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
-    const Instruction *CxtI) {
-  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
-                                   RecursionLimit);
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                     ArrayRef<unsigned> Idxs,
+                                     const SimplifyQuery &Q) {
+  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
 }
 
 /// Given operands for an ExtractValueInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
-                                       const Query &, unsigned) {
+                                       const SimplifyQuery &, unsigned) {
   if (auto *CAgg = dyn_cast<Constant>(Agg))
     return ConstantFoldExtractValueInstruction(CAgg, Idxs);
 
@@ -3922,18 +3947,13 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
 }
 
 Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
-                                      const DataLayout &DL,
-                                      const TargetLibraryInfo *TLI,
-                                      const DominatorTree *DT,
-                                      AssumptionCache *AC,
-                                      const Instruction *CxtI) {
-  return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
-                                    RecursionLimit);
+                                      const SimplifyQuery &Q) {
+  return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
 }
 
 /// Given operands for an ExtractElementInst, see if we can fold the result.
 /// If not, this returns null.
-static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &,
                                          unsigned) {
   if (auto *CVec = dyn_cast<Constant>(Vec)) {
     if (auto *CIdx = dyn_cast<Constant>(Idx))
@@ -3956,15 +3976,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
   return nullptr;
 }
 
-Value *llvm::SimplifyExtractElementInst(
-    Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
-    const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
-  return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
-                                      RecursionLimit);
+Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
+                                        const SimplifyQuery &Q) {
+  return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
 }
 
 /// See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
+static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
   // with the common value.
   Value *CommonValue = nullptr;
@@ -3997,7 +4015,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
 }
 
 static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
-                               Type *Ty, const Query &Q, unsigned MaxRecurse) {
+                               Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (auto *C = dyn_cast<Constant>(Op))
     return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
 
@@ -4031,12 +4049,141 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
 }
 
 Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+                              const SimplifyQuery &Q) {
+  return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
+}
+
+/// For the given destination element of a shuffle, peek through shuffles to
+/// match a root vector source operand that contains that element in the same
+/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
+static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
+                                   int MaskVal, Value *RootVec,
+                                   unsigned MaxRecurse) {
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Bail out if any mask value is undefined. That kind of shuffle may be
+  // simplified further based on demanded bits or other folds.
+  if (MaskVal == -1)
+    return nullptr;
+
+  // The mask value chooses which source operand we need to look at next.
+  int InVecNumElts = Op0->getType()->getVectorNumElements();
+  int RootElt = MaskVal;
+  Value *SourceOp = Op0;
+  if (MaskVal >= InVecNumElts) {
+    RootElt = MaskVal - InVecNumElts;
+    SourceOp = Op1;
+  }
+
+  // If the source operand is a shuffle itself, look through it to find the
+  // matching root vector.
+  if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+    return foldIdentityShuffles(
+        DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
+        SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse);
+  }
+
+  // TODO: Look through bitcasts? What if the bitcast changes the vector element
+  // size?
+
+  // The source operand is not a shuffle. Initialize the root vector value for
+  // this shuffle if that has not been done yet.
+  if (!RootVec)
+    RootVec = SourceOp;
+
+  // Give up as soon as a source operand does not match the existing root value.
+  if (RootVec != SourceOp)
+    return nullptr;
+
+  // The element must be coming from the same lane in the source vector
+  // (although it may have crossed lanes in intermediate shuffles).
+  if (RootElt != DestElt)
+    return nullptr;
+
+  return RootVec;
+}
+
+static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+                                        Type *RetTy, const SimplifyQuery &Q,
+                                        unsigned MaxRecurse) {
+  if (isa<UndefValue>(Mask))
+    return UndefValue::get(RetTy);
+
+  Type *InVecTy = Op0->getType();
+  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  unsigned InVecNumElts = InVecTy->getVectorNumElements();
+
+  SmallVector<int, 32> Indices;
+  ShuffleVectorInst::getShuffleMask(Mask, Indices);
+  assert(MaskNumElts == Indices.size() &&
+         "Size of Indices not same as number of mask elements?");
+
+  // Canonicalization: If mask does not select elements from an input vector,
+  // replace that input vector with undef.
+  bool MaskSelects0 = false, MaskSelects1 = false;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Indices[i] == -1)
+      continue;
+    if ((unsigned)Indices[i] < InVecNumElts)
+      MaskSelects0 = true;
+    else
+      MaskSelects1 = true;
+  }
+  if (!MaskSelects0)
+    Op0 = UndefValue::get(InVecTy);
+  if (!MaskSelects1)
+    Op1 = UndefValue::get(InVecTy);
+
+  auto *Op0Const = dyn_cast<Constant>(Op0);
+  auto *Op1Const = dyn_cast<Constant>(Op1);
+
+  // If all operands are constant, constant fold the shuffle.
+  if (Op0Const && Op1Const)
+    return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
+
+  // Canonicalization: if only one input vector is constant, it shall be the
+  // second one.
+  if (Op0Const && !Op1Const) {
+    std::swap(Op0, Op1);
+    ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
+  }
+
+  // A shuffle of a splat is always the splat itself. Legal if the shuffle's
+  // value type is same as the input vectors' type.
+  if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
+    if (isa<UndefValue>(Op1) && RetTy == InVecTy &&
+        OpShuf->getMask()->getSplatValue())
+      return Op0;
+
+  // Don't fold a shuffle with undef mask elements. This may get folded in a
+  // better way using demanded bits or other analysis.
+  // TODO: Should we allow this?
+  if (find(Indices, -1) != Indices.end())
+    return nullptr;
+
+  // Check if every element of this shuffle can be mapped back to the
+  // corresponding element of a single root vector. If so, we don't need this
+  // shuffle. This handles simple identity shuffles as well as chains of
+  // shuffles that may widen/narrow and/or move elements across lanes and back.
+  Value *RootVec = nullptr;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    // Note that recursion is limited for each vector element, so if any element
+    // exceeds the limit, this will fail to simplify.
+    RootVec =
+        foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse);
+
+    // We can't replace a widening/narrowing shuffle with one of its operands.
+    if (!RootVec || RootVec->getType() != RetTy)
+      return nullptr;
+  }
+  return RootVec;
+}
+
+/// Given operands for a ShuffleVectorInst, fold the result or return null.
+Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+                                       Type *RetTy, const SimplifyQuery &Q) {
+  return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
 }
 
 //=== Helper functions for higher up the class hierarchy.
@@ -4044,64 +4191,46 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
 /// Given operands for a BinaryOperator, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                            const Query &Q, unsigned MaxRecurse) {
+                            const SimplifyQuery &Q, unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::Add:
-    return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           Q, MaxRecurse);
+    return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::FAdd:
     return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-
   case Instruction::Sub:
-    return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           Q, MaxRecurse);
+    return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::FSub:
     return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-
-  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::Mul:
+    return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::FMul:
-    return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
-  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+    return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+  case Instruction::SDiv:
+    return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::UDiv:
+    return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::FDiv:
-      return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
-  case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+    return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+  case Instruction::SRem:
+    return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::URem:
+    return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::FRem:
-      return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+    return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::Shl:
-    return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           Q, MaxRecurse);
+    return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
   case Instruction::LShr:
-    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+    return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
   case Instruction::AShr:
-    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
-  case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
-  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
-  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+    return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
+  case Instruction::And:
+    return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::Or:
+    return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::Xor:
+    return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
   default:
-    if (Constant *CLHS = dyn_cast<Constant>(LHS))
-      if (Constant *CRHS = dyn_cast<Constant>(RHS))
-        return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
-
-    // If the operation is associative, try some generic simplifications.
-    if (Instruction::isAssociative(Opcode))
-      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
-        return V;
-
-    // If the operation is with the result of a select instruction check whether
-    // operating on either branch of the select always yields the same value.
-    if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
-        return V;
-
-    // If the operation is with the result of a phi instruction, check whether
-    // operating on all incoming values of the phi always yields the same value.
-    if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
-        return V;
-
-    return nullptr;
+    llvm_unreachable("Unexpected opcode");
   }
 }
 
@@ -4110,7 +4239,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
 /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
 static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                              const FastMathFlags &FMF, const Query &Q,
+                              const FastMathFlags &FMF, const SimplifyQuery &Q,
                               unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::FAdd:
@@ -4127,36 +4256,26 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const DataLayout &DL, const TargetLibraryInfo *TLI,
-                           const DominatorTree *DT, AssumptionCache *AC,
-                           const Instruction *CxtI) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
-                         RecursionLimit);
+                           const SimplifyQuery &Q) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
 }
 
 Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             const FastMathFlags &FMF, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+                             FastMathFlags FMF, const SimplifyQuery &Q) {
+  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
 /// Given operands for a CmpInst, see if we can fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const Query &Q, unsigned MaxRecurse) {
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
     return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
   return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
-                           RecursionLimit);
+                             const SimplifyQuery &Q) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
 }
 
 static bool IsIdempotent(Intrinsic::ID ID) {
@@ -4249,7 +4368,7 @@ static bool maskIsAllZeroOrUndef(Value *Mask) {
 
 template <typename IterTy>
 static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
-                                const Query &Q, unsigned MaxRecurse) {
+                                const SimplifyQuery &Q, unsigned MaxRecurse) {
   Intrinsic::ID IID = F->getIntrinsicID();
   unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
 
@@ -4267,6 +4386,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
     case Intrinsic::fabs: {
       if (SignBitMustBeZero(*ArgBegin, Q.TLI))
         return *ArgBegin;
+      return nullptr;
     }
     default:
       return nullptr;
@@ -4296,19 +4416,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
     case Intrinsic::uadd_with_overflow:
     case Intrinsic::sadd_with_overflow: {
       // X + undef -> undef
-      if (isa<UndefValue>(RHS))
+      if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
         return UndefValue::get(ReturnType);
 
       return nullptr;
     }
     case Intrinsic::umul_with_overflow:
     case Intrinsic::smul_with_overflow: {
+      // 0 * X -> { 0, false }
       // X * 0 -> { 0, false }
-      if (match(RHS, m_Zero()))
+      if (match(LHS, m_Zero()) || match(RHS, m_Zero()))
         return Constant::getNullValue(ReturnType);
 
+      // undef * X -> { 0, false }
       // X * undef -> { 0, false }
-      if (match(RHS, m_Undef()))
+      if (match(LHS, m_Undef()) || match(RHS, m_Undef()))
         return Constant::getNullValue(ReturnType);
 
       return nullptr;
@@ -4341,8 +4463,9 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
 }
 
 template <typename IterTy>
-static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
-                           const Query &Q, unsigned MaxRecurse) {
+static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
+                           IterTy ArgEnd, const SimplifyQuery &Q,
+                           unsigned MaxRecurse) {
   Type *Ty = V->getType();
   if (PointerType *PTy = dyn_cast<PointerType>(Ty))
     Ty = PTy->getElementType();
@@ -4361,7 +4484,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
     if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
       return Ret;
 
-  if (!canConstantFoldCallTo(F))
+  if (!canConstantFoldCallTo(CS, F))
     return nullptr;
 
   SmallVector<Constant *, 4> ConstantArgs;
@@ -4373,181 +4496,170 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
     ConstantArgs.push_back(C);
   }
 
-  return ConstantFoldCall(F, ConstantArgs, Q.TLI);
+  return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI);
 }
 
-Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                          User::op_iterator ArgEnd, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI, const DominatorTree *DT,
-                          AssumptionCache *AC, const Instruction *CxtI) {
-  return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
-                        RecursionLimit);
+Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
+                          User::op_iterator ArgBegin, User::op_iterator ArgEnd,
+                          const SimplifyQuery &Q) {
+  return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
-                          const DataLayout &DL, const TargetLibraryInfo *TLI,
-                          const DominatorTree *DT, AssumptionCache *AC,
-                          const Instruction *CxtI) {
-  return ::SimplifyCall(V, Args.begin(), Args.end(),
-                        Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
+                          ArrayRef<Value *> Args, const SimplifyQuery &Q) {
+  return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
 }
 
 /// See if we can compute a simplified version of this instruction.
 /// If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI,
-                                 const DominatorTree *DT, AssumptionCache *AC) {
+
+Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
+                                 OptimizationRemarkEmitter *ORE) {
+  const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I);
   Value *Result;
 
   switch (I->getOpcode()) {
   default:
-    Result = ConstantFoldInstruction(I, DL, TLI);
+    Result = ConstantFoldInstruction(I, Q.DL, Q.TLI);
     break;
   case Instruction::FAdd:
     Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+                              I->getFastMathFlags(), Q);
     break;
   case Instruction::Add:
     Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
-                             TLI, DT, AC, I);
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
     break;
   case Instruction::FSub:
     Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+                              I->getFastMathFlags(), Q);
     break;
   case Instruction::Sub:
     Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
-                             TLI, DT, AC, I);
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
     break;
   case Instruction::FMul:
     Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+                              I->getFastMathFlags(), Q);
     break;
   case Instruction::Mul:
-    Result =
-        SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::SDiv:
-    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::UDiv:
-    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::FDiv:
     Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+                              I->getFastMathFlags(), Q);
     break;
   case Instruction::SRem:
-    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::URem:
-    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::FRem:
     Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+                              I->getFastMathFlags(), Q);
     break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
-                             TLI, DT, AC, I);
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
     break;
   case Instruction::LShr:
     Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
-                              cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
-                              AC, I);
+                              cast<BinaryOperator>(I)->isExact(), Q);
     break;
   case Instruction::AShr:
     Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
-                              cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
-                              AC, I);
+                              cast<BinaryOperator>(I)->isExact(), Q);
     break;
   case Instruction::And:
-    Result =
-        SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::Or:
-    Result =
-        SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::Xor:
-    Result =
-        SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::ICmp:
-    Result =
-        SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0),
-                         I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), Q);
     break;
   case Instruction::FCmp:
-    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
+    Result =
+        SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
+                         I->getOperand(1), I->getFastMathFlags(), Q);
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
-                                I->getOperand(2), DL, TLI, DT, AC, I);
+                                I->getOperand(2), Q);
     break;
   case Instruction::GetElementPtr: {
-    SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+    SmallVector<Value *, 8> Ops(I->op_begin(), I->op_end());
     Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(),
-                             Ops, DL, TLI, DT, AC, I);
+                             Ops, Q);
     break;
   }
   case Instruction::InsertValue: {
     InsertValueInst *IV = cast<InsertValueInst>(I);
     Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
                                      IV->getInsertedValueOperand(),
-                                     IV->getIndices(), DL, TLI, DT, AC, I);
+                                     IV->getIndices(), Q);
     break;
   }
   case Instruction::ExtractValue: {
     auto *EVI = cast<ExtractValueInst>(I);
     Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
-                                      EVI->getIndices(), DL, TLI, DT, AC, I);
+                                      EVI->getIndices(), Q);
     break;
   }
   case Instruction::ExtractElement: {
     auto *EEI = cast<ExtractElementInst>(I);
-    Result = SimplifyExtractElementInst(
-        EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+    Result = SimplifyExtractElementInst(EEI->getVectorOperand(),
+                                        EEI->getIndexOperand(), Q);
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    auto *SVI = cast<ShuffleVectorInst>(I);
+    Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+                                       SVI->getMask(), SVI->getType(), Q);
     break;
   }
   case Instruction::PHI:
-    Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
+    Result = SimplifyPHINode(cast<PHINode>(I), Q);
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
-    Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
-                          TLI, DT, AC, I);
+    Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+                          Q);
     break;
   }
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
 #include "llvm/IR/Instruction.def"
 #undef HANDLE_CAST_INST
-    Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
-                              DL, TLI, DT, AC, I);
+    Result =
+        SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q);
+    break;
+  case Instruction::Alloca:
+    // No simplifications for Alloca and it can't be constant folded.
+    Result = nullptr;
     break;
   }
 
   // In general, it is possible for computeKnownBits to determine all bits in a
   // value even when the operands are not all constants.
   if (!Result && I->getType()->isIntOrIntVectorTy()) {
-    unsigned BitWidth = I->getType()->getScalarSizeInBits();
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
-    if ((KnownZero | KnownOne).isAllOnesValue())
-      Result = ConstantInt::get(I->getType(), KnownOne);
+    KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
+    if (Known.isConstant())
+      Result = ConstantInt::get(I->getType(), Known.getConstant());
   }
 
   /// If called on unreachable code, the above logic may report that the
@@ -4599,7 +4711,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
     I = Worklist[Idx];
 
     // See if this instruction simplifies.
-    SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC);
+    SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
     if (!SimpleV)
       continue;
 
@@ -4638,3 +4750,31 @@ bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
   assert(SimpleV && "Must provide a simplified value.");
   return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
 }
+
+namespace llvm {
+const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
+  auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+  auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
+  auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
+  return {F.getParent()->getDataLayout(), TLI, DT, AC};
+}
+
+const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR,
+                                         const DataLayout &DL) {
+  return {DL, &AR.TLI, &AR.DT, &AR.AC};
+}
+
+template <class T, class... TArgs>
+const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM,
+                                         Function &F) {
+  auto *DT = AM.template getCachedResult<DominatorTreeAnalysis>(F);
+  auto *TLI = AM.template getCachedResult<TargetLibraryAnalysis>(F);
+  auto *AC = AM.template getCachedResult<AssumptionAnalysis>(F);
+  return {F.getParent()->getDataLayout(), TLI, DT, AC};
+}
+template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &,
+                                                  Function &);
+}
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
index d1374acd..3992657 100644
--- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -17,17 +17,9 @@
 #include <queue>
 
 namespace llvm {
-template <class NodeTy>
-void IDFCalculator<NodeTy>::calculate(
+template <class NodeTy, bool IsPostDom>
+void IDFCalculator<NodeTy, IsPostDom>::calculate(
     SmallVectorImpl<BasicBlock *> &PHIBlocks) {
-  // If we haven't computed dominator tree levels, do so now.
-  if (DomLevels.empty()) {
-    for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
-         DFI != DFE; ++DFI) {
-      DomLevels[*DFI] = DFI.getPathLength() - 1;
-    }
-  }
-
   // Use a priority queue keyed on dominator tree level so that inserted nodes
   // are handled from the bottom of the dominator tree upwards.
   typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
@@ -37,7 +29,7 @@ void IDFCalculator<NodeTy>::calculate(
 
   for (BasicBlock *BB : *DefBlocks) {
     if (DomTreeNode *Node = DT.getNode(BB))
-      PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+      PQ.push({Node, Node->getLevel()});
   }
 
   SmallVector<DomTreeNode *, 32> Worklist;
@@ -64,10 +56,7 @@ void IDFCalculator<NodeTy>::calculate(
       BasicBlock *BB = Node->getBlock();
       // Succ is the successor in the direction we are calculating IDF, so it is
       // successor for IDF, and predecessor for Reverse IDF.
-      for (auto SuccIter = GraphTraits<NodeTy>::child_begin(BB),
-                End = GraphTraits<NodeTy>::child_end(BB);
-           SuccIter != End; ++SuccIter) {
-        BasicBlock *Succ = *SuccIter;
+      for (auto *Succ : children<NodeTy>(BB)) {
         DomTreeNode *SuccNode = DT.getNode(Succ);
 
         // Quickly skip all CFG edges that are also dominator tree edges instead
@@ -75,7 +64,7 @@ void IDFCalculator<NodeTy>::calculate(
         if (SuccNode->getIDom() == Node)
           continue;
 
-        unsigned SuccLevel = DomLevels.lookup(SuccNode);
+        const unsigned SuccLevel = SuccNode->getLevel();
         if (SuccLevel > RootLevel)
           continue;
 
@@ -99,6 +88,6 @@ void IDFCalculator<NodeTy>::calculate(
   }
 }
 
-template class IDFCalculator<BasicBlock *>;
-template class IDFCalculator<Inverse<BasicBlock *>>;
+template class IDFCalculator<BasicBlock *, false>;
+template class IDFCalculator<Inverse<BasicBlock *>, true>;
 }
diff --git a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 596b6fc..a8178ec 100644
--- a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -9,7 +9,7 @@
 //
 // This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.  The
 // difference is that with this pass the block frequencies are not computed when
-// the analysis pass is executed but rather when the BFI results is explicitly
+// the analysis pass is executed but rather when the BFI result is explicitly
 // requested by the analysis client.
 //
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
index b51c6be..e2884d0 100644
--- a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -16,6 +16,7 @@
 
 #include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 
 using namespace llvm;
 
@@ -24,6 +25,7 @@ using namespace llvm;
 INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
                       "Lazy Branch Probability Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
                     "Lazy Branch Probability Analysis", true, true)
 
@@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
 
 void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
@@ -48,16 +51,19 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
 
 bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI);
+  TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
   return false;
 }
 
 void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) {
   AU.addRequired<LazyBranchProbabilityInfoPass>();
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
 void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) {
   INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass);
   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass);
 }
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index f7cf8c6..d287f81 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -8,36 +8,59 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
+#include <utility>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "lcg"
 
+void LazyCallGraph::EdgeSequence::insertEdgeInternal(Node &TargetN,
+                                                     Edge::Kind EK) {
+  EdgeIndexMap.insert({&TargetN, Edges.size()});
+  Edges.emplace_back(TargetN, EK);
+}
+
+void LazyCallGraph::EdgeSequence::setEdgeKind(Node &TargetN, Edge::Kind EK) {
+  Edges[EdgeIndexMap.find(&TargetN)->second].setKind(EK);
+}
+
+bool LazyCallGraph::EdgeSequence::removeEdgeInternal(Node &TargetN) {
+  auto IndexMapI = EdgeIndexMap.find(&TargetN);
+  if (IndexMapI == EdgeIndexMap.end())
+    return false;
+
+  Edges[IndexMapI->second] = Edge();
+  EdgeIndexMap.erase(IndexMapI);
+  return true;
+}
+
 static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
-                    DenseMap<Function *, int> &EdgeIndexMap, Function &F,
-                    LazyCallGraph::Edge::Kind EK) {
-  if (!EdgeIndexMap.insert({&F, Edges.size()}).second)
+                    DenseMap<LazyCallGraph::Node *, int> &EdgeIndexMap,
+                    LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK) {
+  if (!EdgeIndexMap.insert({&N, Edges.size()}).second)
     return;
 
-  DEBUG(dbgs() << "    Added callable function: " << F.getName() << "\n");
-  Edges.emplace_back(LazyCallGraph::Edge(F, EK));
+  DEBUG(dbgs() << "    Added callable function: " << N.getName() << "\n");
+  Edges.emplace_back(LazyCallGraph::Edge(N, EK));
 }
 
-LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
-    : G(&G), F(F), DFSNumber(0), LowLink(0) {
-  DEBUG(dbgs() << "  Adding functions called by '" << F.getName()
+LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() {
+  assert(!Edges && "Must not have already populated the edges for this node!");
+
+  DEBUG(dbgs() << "  Adding functions called by '" << getName()
                << "' to the graph.\n");
 
+  Edges = EdgeSequence();
+
   SmallVector<Constant *, 16> Worklist;
   SmallPtrSet<Function *, 4> Callees;
   SmallPtrSet<Constant *, 16> Visited;
@@ -58,14 +81,15 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // alias. Then a test of the address of the weak function against the new
   // strong definition's address would be an effective way to determine the
   // safety of optimizing a direct call edge.
-  for (BasicBlock &BB : F)
+  for (BasicBlock &BB : *F)
     for (Instruction &I : BB) {
       if (auto CS = CallSite(&I))
         if (Function *Callee = CS.getCalledFunction())
           if (!Callee->isDeclaration())
             if (Callees.insert(Callee).second) {
               Visited.insert(Callee);
-              addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
+              addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*Callee),
+                      LazyCallGraph::Edge::Call);
             }
 
       for (Value *Op : I.operand_values())
@@ -78,50 +102,59 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // function containing) operands to all of the instructions in the function.
   // Process them (recursively) collecting every function found.
   visitReferences(Worklist, Visited, [&](Function &F) {
-    addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref);
+    addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(F),
+            LazyCallGraph::Edge::Ref);
   });
-}
 
-void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) {
-  if (Node *N = G->lookup(Target))
-    return insertEdgeInternal(*N, EK);
+  // Add implicit reference edges to any defined libcall functions (if we
+  // haven't found an explicit edge).
+  for (auto *F : G->LibFunctions)
+    if (!Visited.count(F))
+      addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*F),
+              LazyCallGraph::Edge::Ref);
 
-  EdgeIndexMap.insert({&Target, Edges.size()});
-  Edges.emplace_back(Target, EK);
+  return *Edges;
 }
 
-void LazyCallGraph::Node::insertEdgeInternal(Node &TargetN, Edge::Kind EK) {
-  EdgeIndexMap.insert({&TargetN.getFunction(), Edges.size()});
-  Edges.emplace_back(TargetN, EK);
+void LazyCallGraph::Node::replaceFunction(Function &NewF) {
+  assert(F != &NewF && "Must not replace a function with itself!");
+  F = &NewF;
 }
 
-void LazyCallGraph::Node::setEdgeKind(Function &TargetF, Edge::Kind EK) {
-  Edges[EdgeIndexMap.find(&TargetF)->second].setKind(EK);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const {
+  dbgs() << *this << '\n';
 }
+#endif
 
-void LazyCallGraph::Node::removeEdgeInternal(Function &Target) {
-  auto IndexMapI = EdgeIndexMap.find(&Target);
-  assert(IndexMapI != EdgeIndexMap.end() &&
-         "Target not in the edge set for this caller?");
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+  LibFunc LF;
 
-  Edges[IndexMapI->second] = Edge();
-  EdgeIndexMap.erase(IndexMapI);
+  // Either this is a normal library function or a "vectorizable" function.
+  return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName());
 }
 
-void LazyCallGraph::Node::dump() const {
-  dbgs() << *this << '\n';
-}
-
-LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
+LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
   DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
                << "\n");
-  for (Function &F : M)
-    if (!F.isDeclaration() && !F.hasLocalLinkage())
-      if (EntryIndexMap.insert({&F, EntryEdges.size()}).second) {
-        DEBUG(dbgs() << "  Adding '" << F.getName()
-                     << "' to entry set of the graph.\n");
-        EntryEdges.emplace_back(F, Edge::Ref);
-      }
+  for (Function &F : M) {
+    if (F.isDeclaration())
+      continue;
+    // If this function is a known lib function to LLVM then we want to
+    // synthesize reference edges to it to model the fact that LLVM can turn
+    // arbitrary code into a library function call.
+    if (isKnownLibFunction(F, TLI))
+      LibFunctions.insert(&F);
+
+    if (F.hasLocalLinkage())
+      continue;
+
+    // External linkage defined functions have edges to them from other
+    // modules.
+    DEBUG(dbgs() << "  Adding '" << F.getName()
+                 << "' to entry set of the graph.\n");
+    addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
+  }
 
   // Now add entry nodes for functions reachable via initializers to globals.
   SmallVector<Constant *, 16> Worklist;
@@ -134,21 +167,16 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
   DEBUG(dbgs() << "  Adding functions referenced by global initializers to the "
                   "entry set.\n");
   visitReferences(Worklist, Visited, [&](Function &F) {
-    addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref);
+    addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F),
+            LazyCallGraph::Edge::Ref);
   });
-
-  for (const Edge &E : EntryEdges)
-    RefSCCEntryNodes.push_back(&E.getFunction());
 }
 
 LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
     : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
-      EntryEdges(std::move(G.EntryEdges)),
-      EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)),
+      EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)),
       SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)),
-      DFSStack(std::move(G.DFSStack)),
-      RefSCCEntryNodes(std::move(G.RefSCCEntryNodes)),
-      NextDFSNumber(G.NextDFSNumber) {
+      LibFunctions(std::move(G.LibFunctions)) {
   updateGraphPtrs();
 }
 
@@ -156,20 +184,19 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
   BPA = std::move(G.BPA);
   NodeMap = std::move(G.NodeMap);
   EntryEdges = std::move(G.EntryEdges);
-  EntryIndexMap = std::move(G.EntryIndexMap);
   SCCBPA = std::move(G.SCCBPA);
   SCCMap = std::move(G.SCCMap);
   LeafRefSCCs = std::move(G.LeafRefSCCs);
-  DFSStack = std::move(G.DFSStack);
-  RefSCCEntryNodes = std::move(G.RefSCCEntryNodes);
-  NextDFSNumber = G.NextDFSNumber;
+  LibFunctions = std::move(G.LibFunctions);
   updateGraphPtrs();
   return *this;
 }
 
-void LazyCallGraph::SCC::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::SCC::dump() const {
   dbgs() << *this << '\n';
 }
+#endif
 
 #ifndef NDEBUG
 void LazyCallGraph::SCC::verify() {
@@ -184,8 +211,8 @@ void LazyCallGraph::SCC::verify() {
            "Must set DFS numbers to -1 when adding a node to an SCC!");
     assert(N->LowLink == -1 &&
            "Must set low link to -1 when adding a node to an SCC!");
-    for (Edge &E : *N)
-      assert(E.getNode() && "Can't have an edge to a raw function!");
+    for (Edge &E : **N)
+      assert(E.getNode() && "Can't have an unpopulated node!");
   }
 }
 #endif
@@ -195,10 +222,9 @@ bool LazyCallGraph::SCC::isParentOf(const SCC &C) const {
     return false;
 
   for (Node &N : *this)
-    for (Edge &E : N.calls())
-      if (Node *CalleeN = E.getNode())
-        if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C)
-          return true;
+    for (Edge &E : N->calls())
+      if (OuterRefSCC->G->lookupSCC(E.getNode()) == &C)
+        return true;
 
   // No edges found.
   return false;
@@ -218,11 +244,8 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
   do {
     const SCC &C = *Worklist.pop_back_val();
     for (Node &N : C)
-      for (Edge &E : N.calls()) {
-        Node *CalleeN = E.getNode();
-        if (!CalleeN)
-          continue;
-        SCC *CalleeC = G.lookupSCC(*CalleeN);
+      for (Edge &E : N->calls()) {
+        SCC *CalleeC = G.lookupSCC(E.getNode());
         if (!CalleeC)
           continue;
 
@@ -243,9 +266,11 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
 
 LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {}
 
-void LazyCallGraph::RefSCC::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::RefSCC::dump() const {
   dbgs() << *this << '\n';
 }
+#endif
 
 #ifndef NDEBUG
 void LazyCallGraph::RefSCC::verify() {
@@ -279,10 +304,10 @@ void LazyCallGraph::RefSCC::verify() {
   for (int i = 0, Size = SCCs.size(); i < Size; ++i) {
     SCC &SourceSCC = *SCCs[i];
     for (Node &N : SourceSCC)
-      for (Edge &E : N) {
+      for (Edge &E : *N) {
         if (!E.isCall())
           continue;
-        SCC &TargetSCC = *G->lookupSCC(*E.getNode());
+        SCC &TargetSCC = *G->lookupSCC(E.getNode());
         if (&TargetSCC.getOuterRefSCC() == this) {
           assert(SCCIndices.find(&TargetSCC)->second <= i &&
                  "Edge between SCCs violates post-order relationship.");
@@ -299,8 +324,8 @@ void LazyCallGraph::RefSCC::verify() {
     auto HasConnectingEdge = [&] {
       for (SCC &C : *ParentRC)
         for (Node &N : C)
-          for (Edge &E : N)
-            if (G->lookupRefSCC(*E.getNode()) == this)
+          for (Edge &E : *N)
+            if (G->lookupRefSCC(E.getNode()) == this)
               return true;
       return false;
     };
@@ -459,9 +484,11 @@ updatePostorderSequenceForEdgeInsertion(
   return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
 }
 
-SmallVector<LazyCallGraph::SCC *, 1>
-LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
-  assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
+bool
+LazyCallGraph::RefSCC::switchInternalEdgeToCall(
+    Node &SourceN, Node &TargetN,
+    function_ref<void(ArrayRef<SCC *> MergeSCCs)> MergeCB) {
+  assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!");
   SmallVector<SCC *, 1> DeletedSCCs;
 
 #ifndef NDEBUG
@@ -477,8 +504,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   // If the two nodes are already part of the same SCC, we're also done as
   // we've just added more connectivity.
   if (&SourceSCC == &TargetSCC) {
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-    return DeletedSCCs;
+    SourceN->setEdgeKind(TargetN, Edge::Call);
+    return false; // No new cycle.
   }
 
   // At this point we leverage the postorder list of SCCs to detect when the
@@ -490,8 +517,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   int SourceIdx = SCCIndices[&SourceSCC];
   int TargetIdx = SCCIndices[&TargetSCC];
   if (TargetIdx < SourceIdx) {
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-    return DeletedSCCs;
+    SourceN->setEdgeKind(TargetN, Edge::Call);
+    return false; // No new cycle.
   }
 
   // Compute the SCCs which (transitively) reach the source.
@@ -504,11 +531,9 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
     ConnectedSet.insert(&SourceSCC);
     auto IsConnected = [&](SCC &C) {
       for (Node &N : C)
-        for (Edge &E : N.calls()) {
-          assert(E.getNode() && "Must have formed a node within an SCC!");
-          if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+        for (Edge &E : N->calls())
+          if (ConnectedSet.count(G->lookupSCC(E.getNode())))
             return true;
-        }
 
       return false;
     };
@@ -535,11 +560,10 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
     do {
       SCC &C = *Worklist.pop_back_val();
       for (Node &N : C)
-        for (Edge &E : N) {
-          assert(E.getNode() && "Must have formed a node within an SCC!");
+        for (Edge &E : *N) {
           if (!E.isCall())
             continue;
-          SCC &EdgeC = *G->lookupSCC(*E.getNode());
+          SCC &EdgeC = *G->lookupSCC(E.getNode());
           if (&EdgeC.getOuterRefSCC() != this)
             // Not in this RefSCC...
             continue;
@@ -561,12 +585,16 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
       SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
       ComputeTargetConnectedSet);
 
+  // Run the user's callback on the merged SCCs before we actually merge them.
+  if (MergeCB)
+    MergeCB(makeArrayRef(MergeRange.begin(), MergeRange.end()));
+
   // If the merge range is empty, then adding the edge didn't actually form any
   // new cycles. We're done.
   if (MergeRange.begin() == MergeRange.end()) {
     // Now that the SCC structure is finalized, flip the kind to call.
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-    return DeletedSCCs;
+    SourceN->setEdgeKind(TargetN, Edge::Call);
+    return false; // No new cycle.
   }
 
 #ifndef NDEBUG
@@ -600,15 +628,15 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
     SCCIndices[C] -= IndexOffset;
 
   // Now that the SCC structure is finalized, flip the kind to call.
-  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+  SourceN->setEdgeKind(TargetN, Edge::Call);
 
-  // And we're done!
-  return DeletedSCCs;
+  // And we're done, but we did form a new cycle.
+  return true;
 }
 
 void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
                                                            Node &TargetN) {
-  assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+  assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
 
 #ifndef NDEBUG
   // In a debug build, verify the RefSCC is valid to start with and when this
@@ -625,12 +653,12 @@ void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
          "Source and Target must be in separate SCCs for this to be trivial!");
 
   // Set the edge kind.
-  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+  SourceN->setEdgeKind(TargetN, Edge::Ref);
 }
 
 iterator_range<LazyCallGraph::RefSCC::iterator>
 LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
-  assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+  assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
 
 #ifndef NDEBUG
   // In a debug build, verify the RefSCC is valid to start with and when this
@@ -650,7 +678,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
                                                 "full CG update.");
 
   // Set the edge kind.
-  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+  SourceN->setEdgeKind(TargetN, Edge::Ref);
 
   // Otherwise we are removing a call edge from a single SCC. This may break
   // the cycle. In order to compute the new set of SCCs, we need to do a small
@@ -665,7 +693,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
   // etc.
 
   SCC &OldSCC = TargetSCC;
-  SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack;
+  SmallVector<std::pair<Node *, EdgeSequence::call_iterator>, 16> DFSStack;
   SmallVector<Node *, 16> PendingSCCStack;
   SmallVector<SCC *, 4> NewSCCs;
 
@@ -706,14 +734,14 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
     RootN->DFSNumber = RootN->LowLink = 1;
     int NextDFSNumber = 2;
 
-    DFSStack.push_back({RootN, RootN->call_begin()});
+    DFSStack.push_back({RootN, (*RootN)->call_begin()});
     do {
       Node *N;
-      call_edge_iterator I;
+      EdgeSequence::call_iterator I;
       std::tie(N, I) = DFSStack.pop_back_val();
-      auto E = N->call_end();
+      auto E = (*N)->call_end();
       while (I != E) {
-        Node &ChildN = *I->getNode();
+        Node &ChildN = I->getNode();
         if (ChildN.DFSNumber == 0) {
           // We haven't yet visited this child, so descend, pushing the current
           // node onto the stack.
@@ -723,8 +751,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
                  "Found a node with 0 DFS number but already in an SCC!");
           ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++;
           N = &ChildN;
-          I = N->call_begin();
-          E = N->call_end();
+          I = (*N)->call_begin();
+          E = (*N)->call_end();
           continue;
         }
 
@@ -817,17 +845,19 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
 
 void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
                                                      Node &TargetN) {
-  assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
+  assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!");
 
   assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
   assert(G->lookupRefSCC(TargetN) != this &&
          "Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
   assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) &&
          "Target must be a descendant of the Source.");
+#endif
 
   // Edges between RefSCCs are the same regardless of call or ref, so we can
   // just flip the edge here.
-  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+  SourceN->setEdgeKind(TargetN, Edge::Call);
 
 #ifndef NDEBUG
   // Check that the RefSCC is still valid.
@@ -837,17 +867,19 @@ void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
 
 void LazyCallGraph::RefSCC::switchOutgoingEdgeToRef(Node &SourceN,
                                                     Node &TargetN) {
-  assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+  assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
 
   assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
   assert(G->lookupRefSCC(TargetN) != this &&
          "Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
   assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) &&
          "Target must be a descendant of the Source.");
+#endif
 
   // Edges between RefSCCs are the same regardless of call or ref, so we can
   // just flip the edge here.
-  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+  SourceN->setEdgeKind(TargetN, Edge::Ref);
 
 #ifndef NDEBUG
   // Check that the RefSCC is still valid.
@@ -860,7 +892,7 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN,
   assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
   assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
 
-  SourceN.insertEdgeInternal(TargetN, Edge::Ref);
+  SourceN->insertEdgeInternal(TargetN, Edge::Ref);
 
 #ifndef NDEBUG
   // Check that the RefSCC is still valid.
@@ -871,14 +903,16 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN,
 void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
                                                Edge::Kind EK) {
   // First insert it into the caller.
-  SourceN.insertEdgeInternal(TargetN, EK);
+  SourceN->insertEdgeInternal(TargetN, EK);
 
   assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
 
   RefSCC &TargetC = *G->lookupRefSCC(TargetN);
   assert(&TargetC != this && "Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
   assert(TargetC.isDescendantOf(*this) &&
          "Target must be a descendant of the Source.");
+#endif
 
   // The only change required is to add this SCC to the parent set of the
   // callee.
@@ -895,8 +929,10 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
   assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
   RefSCC &SourceC = *G->lookupRefSCC(SourceN);
   assert(&SourceC != this && "Source must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
   assert(SourceC.isDescendantOf(*this) &&
          "Source must be a descendant of the Target.");
+#endif
 
   SmallVector<RefSCC *, 1> DeletedRefSCCs;
 
@@ -951,9 +987,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
       RefSCC &RC = *Worklist.pop_back_val();
       for (SCC &C : RC)
         for (Node &N : C)
-          for (Edge &E : N) {
-            assert(E.getNode() && "Must have formed a node!");
-            RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+          for (Edge &E : *N) {
+            RefSCC &EdgeRC = *G->lookupRefSCC(E.getNode());
             if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
               // Not in the postorder sequence between source and target.
               continue;
@@ -1003,10 +1038,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
       SCCIndices[&InnerC] = SCCIndex++;
       for (Node &N : InnerC) {
         G->SCCMap[&N] = &InnerC;
-        for (Edge &E : N) {
-          assert(E.getNode() &&
-                 "Cannot have a null node within a visited SCC!");
-          RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
+        for (Edge &E : *N) {
+          RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
           if (MergeSet.count(&ChildRC))
             continue;
           ChildRC.Parents.erase(RC);
@@ -1042,7 +1075,7 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
 
   // At this point we have a merged RefSCC with a post-order SCCs list, just
   // connect the nodes to form the new edge.
-  SourceN.insertEdgeInternal(TargetN, Edge::Ref);
+  SourceN->insertEdgeInternal(TargetN, Edge::Ref);
 
   // We return the list of SCCs which were merged so that callers can
   // invalidate any data they have associated with those SCCs. Note that these
@@ -1069,15 +1102,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
 #endif
 
   // First remove it from the node.
-  SourceN.removeEdgeInternal(TargetN.getFunction());
+  bool Removed = SourceN->removeEdgeInternal(TargetN);
+  (void)Removed;
+  assert(Removed && "Target not in the edge set for this caller?");
 
   bool HasOtherEdgeToChildRC = false;
   bool HasOtherChildRC = false;
   for (SCC *InnerC : SCCs) {
     for (Node &N : *InnerC) {
-      for (Edge &E : N) {
-        assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
-        RefSCC &OtherChildRC = *G->lookupRefSCC(*E.getNode());
+      for (Edge &E : *N) {
+        RefSCC &OtherChildRC = *G->lookupRefSCC(E.getNode());
         if (&OtherChildRC == &TargetRC) {
           HasOtherEdgeToChildRC = true;
           break;
@@ -1116,7 +1150,7 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
 
 SmallVector<LazyCallGraph::RefSCC *, 1>
 LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
-  assert(!SourceN[TargetN].isCall() &&
+  assert(!(*SourceN)[TargetN].isCall() &&
          "Cannot remove a call edge, it must first be made a ref edge");
 
 #ifndef NDEBUG
@@ -1127,7 +1161,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
 #endif
 
   // First remove the actual edge.
-  SourceN.removeEdgeInternal(TargetN.getFunction());
+  bool Removed = SourceN->removeEdgeInternal(TargetN);
+  (void)Removed;
+  assert(Removed && "Target not in the edge set for this caller?");
 
   // We return a list of the resulting *new* RefSCCs in post-order.
   SmallVector<RefSCC *, 1> Result;
@@ -1186,7 +1222,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     PostOrderMapping[&N] = Number;
   };
 
-  SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
+  SmallVector<std::pair<Node *, EdgeSequence::iterator>, 4> DFSStack;
   SmallVector<Node *, 4> PendingRefSCCStack;
   do {
     assert(DFSStack.empty() &&
@@ -1205,18 +1241,18 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     RootN->DFSNumber = RootN->LowLink = 1;
     int NextDFSNumber = 2;
 
-    DFSStack.push_back({RootN, RootN->begin()});
+    DFSStack.push_back({RootN, (*RootN)->begin()});
     do {
       Node *N;
-      edge_iterator I;
+      EdgeSequence::iterator I;
       std::tie(N, I) = DFSStack.pop_back_val();
-      auto E = N->end();
+      auto E = (*N)->end();
 
       assert(N->DFSNumber != 0 && "We should always assign a DFS number "
                                   "before processing a node.");
 
       while (I != E) {
-        Node &ChildN = I->getNode(*G);
+        Node &ChildN = I->getNode();
         if (ChildN.DFSNumber == 0) {
           // Mark that we should start at this child when next this node is the
           // top of the stack. We don't start at the next child to ensure this
@@ -1226,8 +1262,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
           // Continue, resetting to the child node.
           ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
           N = &ChildN;
-          I = ChildN.begin();
-          E = ChildN.end();
+          I = ChildN->begin();
+          E = ChildN->end();
           continue;
         }
         if (ChildN.DFSNumber == -1) {
@@ -1382,9 +1418,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
 #endif
   for (SCC *C : SCCs)
     for (Node &N : *C) {
-      for (Edge &E : N) {
-        assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
-        RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
+      for (Edge &E : *N) {
+        RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
         if (&ChildRC == this)
           continue;
         ChildRC.Parents.insert(this);
@@ -1408,9 +1443,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   for (RefSCC *ParentRC : OldParents)
     for (SCC &ParentC : *ParentRC)
       for (Node &ParentN : ParentC)
-        for (Edge &E : ParentN) {
-          assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
-          RefSCC &RC = *G->lookupRefSCC(*E.getNode());
+        for (Edge &E : *ParentN) {
+          RefSCC &RC = *G->lookupRefSCC(E.getNode());
           if (&RC != ParentRC)
             RC.Parents.insert(ParentRC);
         }
@@ -1448,8 +1482,10 @@ void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
     return;
   }
 
+#ifdef EXPENSIVE_CHECKS
   assert(TargetRC.isDescendantOf(*this) &&
          "Target must be a descendant of the Source.");
+#endif
   // The only change required is to add this RefSCC to the parent set of the
   // target. This is a set and so idempotent if the edge already existed.
   TargetRC.Parents.insert(this);
@@ -1461,25 +1497,29 @@ void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
   // Check that the RefSCC is still valid when we finish.
   auto ExitVerifier = make_scope_exit([this] { verify(); });
 
-  // Check that we aren't breaking some invariants of the SCC graph.
+#ifdef EXPENSIVE_CHECKS
+  // Check that we aren't breaking some invariants of the SCC graph. Note that
+  // this is quadratic in the number of edges in the call graph!
   SCC &SourceC = *G->lookupSCC(SourceN);
   SCC &TargetC = *G->lookupSCC(TargetN);
   if (&SourceC != &TargetC)
     assert(SourceC.isAncestorOf(TargetC) &&
            "Call edge is not trivial in the SCC graph!");
-#endif
+#endif // EXPENSIVE_CHECKS
+#endif // NDEBUG
+
   // First insert it into the source or find the existing edge.
-  auto InsertResult = SourceN.EdgeIndexMap.insert(
-      {&TargetN.getFunction(), SourceN.Edges.size()});
+  auto InsertResult =
+      SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()});
   if (!InsertResult.second) {
     // Already an edge, just update it.
-    Edge &E = SourceN.Edges[InsertResult.first->second];
+    Edge &E = SourceN->Edges[InsertResult.first->second];
     if (E.isCall())
       return; // Nothing to do!
     E.setKind(Edge::Call);
   } else {
     // Create the new edge.
-    SourceN.Edges.emplace_back(TargetN, Edge::Call);
+    SourceN->Edges.emplace_back(TargetN, Edge::Call);
   }
 
   // Now that we have the edge, handle the graph fallout.
@@ -1491,39 +1531,75 @@ void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
   // Check that the RefSCC is still valid when we finish.
   auto ExitVerifier = make_scope_exit([this] { verify(); });
 
+#ifdef EXPENSIVE_CHECKS
   // Check that we aren't breaking some invariants of the RefSCC graph.
   RefSCC &SourceRC = *G->lookupRefSCC(SourceN);
   RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
   if (&SourceRC != &TargetRC)
     assert(SourceRC.isAncestorOf(TargetRC) &&
            "Ref edge is not trivial in the RefSCC graph!");
-#endif
+#endif // EXPENSIVE_CHECKS
+#endif // NDEBUG
+
   // First insert it into the source or find the existing edge.
-  auto InsertResult = SourceN.EdgeIndexMap.insert(
-      {&TargetN.getFunction(), SourceN.Edges.size()});
+  auto InsertResult =
+      SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()});
   if (!InsertResult.second)
     // Already an edge, we're done.
     return;
 
   // Create the new edge.
-  SourceN.Edges.emplace_back(TargetN, Edge::Ref);
+  SourceN->Edges.emplace_back(TargetN, Edge::Ref);
 
   // Now that we have the edge, handle the graph fallout.
   handleTrivialEdgeInsertion(SourceN, TargetN);
 }
 
-void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) {
-  assert(SCCMap.empty() && DFSStack.empty() &&
+void LazyCallGraph::RefSCC::replaceNodeFunction(Node &N, Function &NewF) {
+  Function &OldF = N.getFunction();
+
+#ifndef NDEBUG
+  // Check that the RefSCC is still valid when we finish.
+  auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+  assert(G->lookupRefSCC(N) == this &&
+         "Cannot replace the function of a node outside this RefSCC.");
+
+  assert(G->NodeMap.find(&NewF) == G->NodeMap.end() &&
+         "Must not have already walked the new function!'");
+
+  // It is important that this replacement not introduce graph changes so we
+  // insist that the caller has already removed every use of the original
+  // function and that all uses of the new function correspond to existing
+  // edges in the graph. The common and expected way to use this is when
+  // replacing the function itself in the IR without changing the call graph
+  // shape and just updating the analysis based on that.
+  assert(&OldF != &NewF && "Cannot replace a function with itself!");
+  assert(OldF.use_empty() &&
+         "Must have moved all uses from the old function to the new!");
+#endif
+
+  N.replaceFunction(NewF);
+
+  // Update various call graph maps.
+  G->NodeMap.erase(&OldF);
+  G->NodeMap[&NewF] = &N;
+}
+
+void LazyCallGraph::insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK) {
+  assert(SCCMap.empty() &&
          "This method cannot be called after SCCs have been formed!");
 
-  return SourceN.insertEdgeInternal(Target, EK);
+  return SourceN->insertEdgeInternal(TargetN, EK);
 }
 
-void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) {
-  assert(SCCMap.empty() && DFSStack.empty() &&
+void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) {
+  assert(SCCMap.empty() &&
          "This method cannot be called after SCCs have been formed!");
 
-  return SourceN.removeEdgeInternal(Target);
+  bool Removed = SourceN->removeEdgeInternal(TargetN);
+  (void)Removed;
+  assert(Removed && "Target not in the edge set for this caller?");
 }
 
 void LazyCallGraph::removeDeadFunction(Function &F) {
@@ -1532,18 +1608,10 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
   assert(F.use_empty() &&
          "This routine should only be called on trivially dead functions!");
 
-  auto EII = EntryIndexMap.find(&F);
-  if (EII != EntryIndexMap.end()) {
-    EntryEdges[EII->second] = Edge();
-    EntryIndexMap.erase(EII);
-  }
-
-  // It's safe to just remove un-visited functions from the RefSCC entry list.
-  // FIXME: This is a linear operation which could become hot and benefit from
-  // an index map.
-  auto RENI = find(RefSCCEntryNodes, &F);
-  if (RENI != RefSCCEntryNodes.end())
-    RefSCCEntryNodes.erase(RENI);
+  // We shouldn't remove library functions as they are never really dead while
+  // the call graph is in use -- every function definition refers to them.
+  assert(!isLibFunction(F) &&
+         "Must not remove lib functions from the call graph!");
 
   auto NI = NodeMap.find(&F);
   if (NI == NodeMap.end())
@@ -1553,22 +1621,16 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
   Node &N = *NI->second;
   NodeMap.erase(NI);
 
-  if (SCCMap.empty() && DFSStack.empty()) {
-    // No SCC walk has begun, so removing this is fine and there is nothing
+  // Remove this from the entry edges if present.
+  EntryEdges.removeEdgeInternal(N);
+
+  if (SCCMap.empty()) {
+    // No SCCs have been formed, so removing this is fine and there is nothing
     // else necessary at this point but clearing out the node.
     N.clear();
     return;
   }
 
-  // Check that we aren't going to break the DFS walk.
-  assert(all_of(DFSStack,
-                [&N](const std::pair<Node *, edge_iterator> &Element) {
-                  return Element.first != &N;
-                }) &&
-         "Tried to remove a function currently in the DFS stack!");
-  assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() &&
-         "Tried to remove a function currently pending to add to a RefSCC!");
-
   // Cannot remove a function which has yet to be visited in the DFS walk, so
   // if we have a node at all then we must have an SCC and RefSCC.
   auto CI = SCCMap.find(&N);
@@ -1583,13 +1645,19 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
   // Validate these properties first.
   assert(C.size() == 1 && "Dead functions must be in a singular SCC");
   assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
-  assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!");
+
+  // Clean up any remaining reference edges. Note that we walk an unordered set
+  // here but are just removing and so the order doesn't matter.
+  for (RefSCC &ParentRC : RC.parents())
+    for (SCC &ParentC : ParentRC)
+      for (Node &ParentN : ParentC)
+        if (ParentN)
+          ParentN->removeEdgeInternal(N);
 
   // Now remove this RefSCC from any parents sets and the leaf list.
-  for (Edge &E : N)
-    if (Node *TargetN = E.getNode())
-      if (RefSCC *TargetRC = lookupRefSCC(*TargetN))
-        TargetRC->Parents.erase(&RC);
+  for (Edge &E : *N)
+    if (RefSCC *TargetRC = lookupRefSCC(E.getNode()))
+      TargetRC->Parents.erase(&RC);
   // FIXME: This is a linear operation which could become hot and benefit from
   // an index map.
   auto LRI = find(LeafRefSCCs, &RC);
@@ -1622,15 +1690,14 @@ void LazyCallGraph::updateGraphPtrs() {
   {
     SmallVector<Node *, 16> Worklist;
     for (Edge &E : EntryEdges)
-      if (Node *EntryN = E.getNode())
-        Worklist.push_back(EntryN);
+      Worklist.push_back(&E.getNode());
 
     while (!Worklist.empty()) {
-      Node *N = Worklist.pop_back_val();
-      N->G = this;
-      for (Edge &E : N->Edges)
-        if (Node *TargetN = E.getNode())
-          Worklist.push_back(TargetN);
+      Node &N = *Worklist.pop_back_val();
+      N.G = this;
+      if (N)
+        for (Edge &E : *N)
+          Worklist.push_back(&E.getNode());
     }
   }
 
@@ -1647,34 +1714,18 @@ void LazyCallGraph::updateGraphPtrs() {
   }
 }
 
-/// Build the internal SCCs for a RefSCC from a sequence of nodes.
-///
-/// Appends the SCCs to the provided vector and updates the map with their
-/// indices. Both the vector and map must be empty when passed into this
-/// routine.
-void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
-  assert(RC.SCCs.empty() && "Already built SCCs!");
-  assert(RC.SCCIndices.empty() && "Already mapped SCC indices!");
-
-  for (Node *N : Nodes) {
-    assert(N->LowLink >= (*Nodes.begin())->LowLink &&
-           "We cannot have a low link in an SCC lower than its root on the "
-           "stack!");
-
-    // This node will go into the next RefSCC, clear out its DFS and low link
-    // as we scan.
-    N->DFSNumber = N->LowLink = 0;
-  }
+template <typename RootsT, typename GetBeginT, typename GetEndT,
+          typename GetNodeT, typename FormSCCCallbackT>
+void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
+                                     GetEndT &&GetEnd, GetNodeT &&GetNode,
+                                     FormSCCCallbackT &&FormSCC) {
+  typedef decltype(GetBegin(std::declval<Node &>())) EdgeItT;
 
-  // Each RefSCC contains a DAG of the call SCCs. To build these, we do
-  // a direct walk of the call edges using Tarjan's algorithm. We reuse the
-  // internal storage as we won't need it for the outer graph's DFS any longer.
-
-  SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack;
+  SmallVector<std::pair<Node *, EdgeItT>, 16> DFSStack;
   SmallVector<Node *, 16> PendingSCCStack;
 
   // Scan down the stack and DFS across the call edges.
-  for (Node *RootN : Nodes) {
+  for (Node *RootN : Roots) {
     assert(DFSStack.empty() &&
            "Cannot begin a new root with a non-empty DFS stack!");
     assert(PendingSCCStack.empty() &&
@@ -1690,25 +1741,23 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
     RootN->DFSNumber = RootN->LowLink = 1;
     int NextDFSNumber = 2;
 
-    DFSStack.push_back({RootN, RootN->call_begin()});
+    DFSStack.push_back({RootN, GetBegin(*RootN)});
     do {
       Node *N;
-      call_edge_iterator I;
+      EdgeItT I;
       std::tie(N, I) = DFSStack.pop_back_val();
-      auto E = N->call_end();
+      auto E = GetEnd(*N);
       while (I != E) {
-        Node &ChildN = *I->getNode();
+        Node &ChildN = GetNode(I);
         if (ChildN.DFSNumber == 0) {
           // We haven't yet visited this child, so descend, pushing the current
           // node onto the stack.
           DFSStack.push_back({N, I});
 
-          assert(!lookupSCC(ChildN) &&
-                 "Found a node with 0 DFS number but already in an SCC!");
           ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++;
           N = &ChildN;
-          I = N->call_begin();
-          E = N->call_end();
+          I = GetBegin(*N);
+          E = GetEnd(*N);
           continue;
         }
 
@@ -1750,20 +1799,93 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
           }));
       // Form a new SCC out of these nodes and then clear them off our pending
       // stack.
-      RC.SCCs.push_back(createSCC(RC, SCCNodes));
-      for (Node &N : *RC.SCCs.back()) {
-        N.DFSNumber = N.LowLink = -1;
-        SCCMap[&N] = RC.SCCs.back();
-      }
+      FormSCC(SCCNodes);
       PendingSCCStack.erase(SCCNodes.end().base(), PendingSCCStack.end());
     } while (!DFSStack.empty());
   }
+}
+
+/// Build the internal SCCs for a RefSCC from a sequence of nodes.
+///
+/// Appends the SCCs to the provided vector and updates the map with their
+/// indices. Both the vector and map must be empty when passed into this
+/// routine.
+void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
+  assert(RC.SCCs.empty() && "Already built SCCs!");
+  assert(RC.SCCIndices.empty() && "Already mapped SCC indices!");
+
+  for (Node *N : Nodes) {
+    assert(N->LowLink >= (*Nodes.begin())->LowLink &&
+           "We cannot have a low link in an SCC lower than its root on the "
+           "stack!");
+
+    // This node will go into the next RefSCC, clear out its DFS and low link
+    // as we scan.
+    N->DFSNumber = N->LowLink = 0;
+  }
+
+  // Each RefSCC contains a DAG of the call SCCs. To build these, we do
+  // a direct walk of the call edges using Tarjan's algorithm. We reuse the
+  // internal storage as we won't need it for the outer graph's DFS any longer.
+  buildGenericSCCs(
+      Nodes, [](Node &N) { return N->call_begin(); },
+      [](Node &N) { return N->call_end(); },
+      [](EdgeSequence::call_iterator I) -> Node & { return I->getNode(); },
+      [this, &RC](node_stack_range Nodes) {
+        RC.SCCs.push_back(createSCC(RC, Nodes));
+        for (Node &N : *RC.SCCs.back()) {
+          N.DFSNumber = N.LowLink = -1;
+          SCCMap[&N] = RC.SCCs.back();
+        }
+      });
 
   // Wire up the SCC indices.
   for (int i = 0, Size = RC.SCCs.size(); i < Size; ++i)
     RC.SCCIndices[RC.SCCs[i]] = i;
 }
 
+void LazyCallGraph::buildRefSCCs() {
+  if (EntryEdges.empty() || !PostOrderRefSCCs.empty())
+    // RefSCCs are either non-existent or already built!
+    return;
+
+  assert(RefSCCIndices.empty() && "Already mapped RefSCC indices!");
+
+  SmallVector<Node *, 16> Roots;
+  for (Edge &E : *this)
+    Roots.push_back(&E.getNode());
+
+  // The roots will be popped of a stack, so use reverse to get a less
+  // surprising order. This doesn't change any of the semantics anywhere.
+  std::reverse(Roots.begin(), Roots.end());
+
+  buildGenericSCCs(
+      Roots,
+      [](Node &N) {
+        // We need to populate each node as we begin to walk its edges.
+        N.populate();
+        return N->begin();
+      },
+      [](Node &N) { return N->end(); },
+      [](EdgeSequence::iterator I) -> Node & { return I->getNode(); },
+      [this](node_stack_range Nodes) {
+        RefSCC *NewRC = createRefSCC(*this);
+        buildSCCs(*NewRC, Nodes);
+        connectRefSCC(*NewRC);
+
+        // Push the new node into the postorder list and remember its position
+        // in the index map.
+        bool Inserted =
+            RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+        (void)Inserted;
+        assert(Inserted && "Cannot already have this RefSCC in the index map!");
+        PostOrderRefSCCs.push_back(NewRC);
+#ifndef NDEBUG
+        NewRC->verify();
+#endif
+      });
+}
+
 // FIXME: We should move callers of this to embed the parent linking and leaf
 // tracking into their DFS in order to remove a full walk of all edges.
 void LazyCallGraph::connectRefSCC(RefSCC &RC) {
@@ -1773,10 +1895,8 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
   bool IsLeaf = true;
   for (SCC &C : RC)
     for (Node &N : C)
-      for (Edge &E : N) {
-        assert(E.getNode() &&
-               "Cannot have a missing node in a visited part of the graph!");
-        RefSCC &ChildRC = *lookupRefSCC(*E.getNode());
+      for (Edge &E : *N) {
+        RefSCC &ChildRC = *lookupRefSCC(E.getNode());
         if (&ChildRC == &RC)
           continue;
         ChildRC.Parents.insert(&RC);
@@ -1788,113 +1908,13 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
     LeafRefSCCs.push_back(&RC);
 }
 
-bool LazyCallGraph::buildNextRefSCCInPostOrder() {
-  if (DFSStack.empty()) {
-    Node *N;
-    do {
-      // If we've handled all candidate entry nodes to the SCC forest, we're
-      // done.
-      if (RefSCCEntryNodes.empty())
-        return false;
-
-      N = &get(*RefSCCEntryNodes.pop_back_val());
-    } while (N->DFSNumber != 0);
-
-    // Found a new root, begin the DFS here.
-    N->LowLink = N->DFSNumber = 1;
-    NextDFSNumber = 2;
-    DFSStack.push_back({N, N->begin()});
-  }
-
-  for (;;) {
-    Node *N;
-    edge_iterator I;
-    std::tie(N, I) = DFSStack.pop_back_val();
-
-    assert(N->DFSNumber > 0 && "We should always assign a DFS number "
-                               "before placing a node onto the stack.");
-
-    auto E = N->end();
-    while (I != E) {
-      Node &ChildN = I->getNode(*this);
-      if (ChildN.DFSNumber == 0) {
-        // We haven't yet visited this child, so descend, pushing the current
-        // node onto the stack.
-        DFSStack.push_back({N, N->begin()});
-
-        assert(!SCCMap.count(&ChildN) &&
-               "Found a node with 0 DFS number but already in an SCC!");
-        ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
-        N = &ChildN;
-        I = N->begin();
-        E = N->end();
-        continue;
-      }
-
-      // If the child has already been added to some child component, it
-      // couldn't impact the low-link of this parent because it isn't
-      // connected, and thus its low-link isn't relevant so skip it.
-      if (ChildN.DFSNumber == -1) {
-        ++I;
-        continue;
-      }
-
-      // Track the lowest linked child as the lowest link for this node.
-      assert(ChildN.LowLink > 0 && "Must have a positive low-link number!");
-      if (ChildN.LowLink < N->LowLink)
-        N->LowLink = ChildN.LowLink;
-
-      // Move to the next edge.
-      ++I;
-    }
-
-    // We've finished processing N and its descendents, put it on our pending
-    // SCC stack to eventually get merged into an SCC of nodes.
-    PendingRefSCCStack.push_back(N);
-
-    // If this node is linked to some lower entry, continue walking up the
-    // stack.
-    if (N->LowLink != N->DFSNumber) {
-      assert(!DFSStack.empty() &&
-             "We never found a viable root for an SCC to pop off!");
-      continue;
-    }
-
-    // Otherwise, form a new RefSCC from the top of the pending node stack.
-    int RootDFSNumber = N->DFSNumber;
-    // Find the range of the node stack by walking down until we pass the
-    // root DFS number.
-    auto RefSCCNodes = node_stack_range(
-        PendingRefSCCStack.rbegin(),
-        find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
-          return N->DFSNumber < RootDFSNumber;
-        }));
-    // Form a new RefSCC out of these nodes and then clear them off our pending
-    // stack.
-    RefSCC *NewRC = createRefSCC(*this);
-    buildSCCs(*NewRC, RefSCCNodes);
-    connectRefSCC(*NewRC);
-    PendingRefSCCStack.erase(RefSCCNodes.end().base(),
-                             PendingRefSCCStack.end());
-
-    // Push the new node into the postorder list and return true indicating we
-    // successfully grew the postorder sequence by one.
-    bool Inserted =
-        RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
-    (void)Inserted;
-    assert(Inserted && "Cannot already have this RefSCC in the index map!");
-    PostOrderRefSCCs.push_back(NewRC);
-    return true;
-  }
-}
-
 AnalysisKey LazyCallGraphAnalysis::Key;
 
 LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
 
 static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
   OS << "  Edges in function: " << N.getFunction().getName() << "\n";
-  for (const LazyCallGraph::Edge &E : N)
+  for (LazyCallGraph::Edge &E : N.populate())
     OS << "    " << (E.isCall() ? "call" : "ref ") << " -> "
        << E.getFunction().getName() << "\n";
 
@@ -1929,6 +1949,7 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M,
   for (Function &F : M)
     printNode(OS, G.get(F));
 
+  G.buildRefSCCs();
   for (LazyCallGraph::RefSCC &C : G.postorder_ref_sccs())
     printRefSCC(OS, C);
 
@@ -1941,7 +1962,7 @@ LazyCallGraphDOTPrinterPass::LazyCallGraphDOTPrinterPass(raw_ostream &OS)
 static void printNodeDOT(raw_ostream &OS, LazyCallGraph::Node &N) {
   std::string Name = "\"" + DOT::EscapeString(N.getFunction().getName()) + "\"";
 
-  for (const LazyCallGraph::Edge &E : N) {
+  for (LazyCallGraph::Edge &E : N.populate()) {
     OS << "  " << Name << " -> \""
        << DOT::EscapeString(E.getFunction().getName()) << "\"";
     if (!E.isCall()) // It is a ref edge.
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index d442310..102081e 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -31,6 +32,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <stack>
@@ -39,6 +41,10 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "lazy-value-info"
 
+// This is the number of worklist items we will process to try to discover an
+// answer for a given value.
+static const unsigned MaxProcessedPerValue = 500;
+
 char LazyValueInfoWrapperPass::ID = 0;
 INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
@@ -136,7 +142,7 @@ public:
     return Val;
   }
 
-  ConstantRange getConstantRange() const {
+  const ConstantRange &getConstantRange() const {
     assert(isConstantRange() &&
            "Cannot get the constant-range of a non-constant-range!");
     return Range;
@@ -244,7 +250,7 @@ public:
     if (NewR.isFullSet())
       markOverdefined();
     else
-      markConstantRange(NewR);
+      markConstantRange(std::move(NewR));
   }
 };
 
@@ -296,7 +302,7 @@ static bool hasSingleValue(const LVILatticeVal &Val) {
 ///   contradictory.  If this happens, we return some valid lattice value so as
 ///   not confuse the rest of LVI.  Ideally, we'd always return Undefined, but
 ///   we do not make this guarantee.  TODO: This would be a useful enhancement.
-static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) {
+static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) {
   // Undefined is the strongest state.  It means the value is known to be along
   // an unreachable path.
   if (A.isUndefined())
@@ -366,22 +372,22 @@ namespace {
     struct ValueCacheEntryTy {
       ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
       LVIValueHandle Handle;
-      SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+      SmallDenseMap<PoisoningVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
     };
 
-    /// This is all of the cached information for all values,
-    /// mapped from Value* to key information.
-    DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
-
     /// This tracks, on a per-block basis, the set of values that are
     /// over-defined at the end of that block.
-    typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+    typedef DenseMap<PoisoningVH<BasicBlock>, SmallPtrSet<Value *, 4>>
         OverDefinedCacheTy;
-    OverDefinedCacheTy OverDefinedCache;
-
     /// Keep track of all blocks that we have ever seen, so we
     /// don't spend time removing unused blocks from our caches.
-    DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+    DenseSet<PoisoningVH<BasicBlock> > SeenBlocks;
+
+    /// This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
+    OverDefinedCacheTy OverDefinedCache;
+
 
   public:
     void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
@@ -459,15 +465,15 @@ namespace {
 }
 
 void LazyValueInfoCache::eraseValue(Value *V) {
-  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
-  for (auto &I : OverDefinedCache) {
-    SmallPtrSetImpl<Value *> &ValueSet = I.second;
+  for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) {
+    // Copy and increment the iterator immediately so we can erase behind
+    // ourselves.
+    auto Iter = I++;
+    SmallPtrSetImpl<Value *> &ValueSet = Iter->second;
     ValueSet.erase(V);
     if (ValueSet.empty())
-      ToErase.push_back(I.first);
+      OverDefinedCache.erase(Iter);
   }
-  for (auto &BB : ToErase)
-    OverDefinedCache.erase(BB);
 
   ValueCache.erase(V);
 }
@@ -480,7 +486,7 @@ void LVIValueHandle::deleted() {
 
 void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
   // Shortcut if we have never seen this block.
-  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  DenseSet<PoisoningVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
   if (I == SeenBlocks.end())
     return;
   SeenBlocks.erase(I);
@@ -551,6 +557,30 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
   }
 }
 
+
+namespace {
+/// An assembly annotator class to print LazyValueCache information in
+/// comments.
+class LazyValueInfoImpl;
+class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+  LazyValueInfoImpl *LVIImpl;
+  // While analyzing which blocks we can solve values for, we need the dominator
+  // information. Since this is an optional parameter in LVI, we require this
+  // DomTreeAnalysis pass in the printer pass, and pass the dominator
+  // tree to the LazyValueInfoAnnotatedWriter.
+  DominatorTree &DT;
+
+public:
+  LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree)
+      : LVIImpl(L), DT(DTree) {}
+
+  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                        formatted_raw_ostream &OS);
+
+  virtual void emitInstructionAnnot(const Instruction *I,
+                                    formatted_raw_ostream &OS);
+};
+}
 namespace {
   // The actual implementation of the lazy analysis and update.  Note that the
   // inheritance from LazyValueInfoCache is intended to be temporary while
@@ -563,7 +593,7 @@ namespace {
     /// This stack holds the state of the value solver during a query.
     /// It basically emulates the callstack of the naive
     /// recursive value lookup process.
-    std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+    SmallVector<std::pair<BasicBlock*, Value*>, 8> BlockValueStack;
 
     /// Keeps track of which block-value pairs are in BlockValueStack.
     DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet;
@@ -576,7 +606,7 @@ namespace {
 
       DEBUG(dbgs() << "PUSH: " << *BV.second << " in " << BV.first->getName()
                    << "\n");
-      BlockValueStack.push(BV);
+      BlockValueStack.push_back(BV);
       return true;
     }
 
@@ -598,13 +628,13 @@ namespace {
   bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
   bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
                              BasicBlock *BB);
-  bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI,
+  bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI,
                                BasicBlock *BB);
-  bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
+  bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI,
                            BasicBlock *BB);
   void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
                                                      LVILatticeVal &BBLV,
-                                              Instruction *BBI);
+                                                     Instruction *BBI);
 
   void solve();
 
@@ -629,6 +659,12 @@ namespace {
       TheCache.clear();
     }
 
+    /// Printing the LazyValueInfo Analysis.
+    void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
+        LazyValueInfoAnnotatedWriter Writer(this, DTree);
+        F.print(OS, &Writer);
+    }
+
     /// This is part of the update interface to inform the cache
     /// that a block has been deleted.
     void eraseBlock(BasicBlock *BB) {
@@ -645,25 +681,52 @@ namespace {
   };
 } // end anonymous namespace
 
+
 void LazyValueInfoImpl::solve() {
+  SmallVector<std::pair<BasicBlock *, Value *>, 8> StartingStack(
+      BlockValueStack.begin(), BlockValueStack.end());
+
+  unsigned processedCount = 0;
   while (!BlockValueStack.empty()) {
-    std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+    processedCount++;
+    // Abort if we have to process too many values to get a result for this one.
+    // Because of the design of the overdefined cache currently being per-block
+    // to avoid naming-related issues (IE it wants to try to give different
+    // results for the same name in different blocks), overdefined results don't
+    // get cached globally, which in turn means we will often try to rediscover
+    // the same overdefined result again and again.  Once something like
+    // PredicateInfo is used in LVI or CVP, we should be able to make the
+    // overdefined cache global, and remove this throttle.
+    if (processedCount > MaxProcessedPerValue) {
+      DEBUG(dbgs() << "Giving up on stack because we are getting too deep\n");
+      // Fill in the original values
+      while (!StartingStack.empty()) {
+        std::pair<BasicBlock *, Value *> &e = StartingStack.back();
+        TheCache.insertResult(e.second, e.first,
+                              LVILatticeVal::getOverdefined());
+        StartingStack.pop_back();
+      }
+      BlockValueSet.clear();
+      BlockValueStack.clear();
+      return;
+    }
+    std::pair<BasicBlock *, Value *> e = BlockValueStack.back();
     assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
 
     if (solveBlockValue(e.second, e.first)) {
       // The work item was completely processed.
-      assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
+      assert(BlockValueStack.back() == e && "Nothing should have been pushed!");
       assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
              "Result should be in cache!");
 
       DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
                    << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
 
-      BlockValueStack.pop();
+      BlockValueStack.pop_back();
       BlockValueSet.erase(e);
     } else {
       // More work needs to be done before revisiting.
-      assert(BlockValueStack.top() != e && "Stack should have been pushed!");
+      assert(BlockValueStack.back() != e && "Stack should have been pushed!");
     }
   }
 }
@@ -743,7 +806,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
   // that for all other pointer typed values, we terminate the search at the
   // definition.  We could easily extend this to look through geps, bitcasts,
   // and the like to prove non-nullness, but it's not clear that's worth it
-  // compile time wise.  The context-insensative value walk done inside
+  // compile time wise.  The context-insensitive value walk done inside
   // isKnownNonNull gets most of the profitable cases at much less expense.
   // This does mean that we have a sensativity to where the defining
   // instruction is placed, even if it could legally be hoisted much higher.
@@ -754,12 +817,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
     return true;
   }
   if (BBI->getType()->isIntegerTy()) {
-    if (isa<CastInst>(BBI))
-      return solveBlockValueCast(Res, BBI, BB);
-    
+    if (auto *CI = dyn_cast<CastInst>(BBI))
+      return solveBlockValueCast(Res, CI, BB);
+
     BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
     if (BO && isa<ConstantInt>(BO->getOperand(1)))
-      return solveBlockValueBinaryOp(Res, BBI, BB);
+      return solveBlockValueBinaryOp(Res, BO, BB);
   }
 
   DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -825,7 +888,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   // value is overdefined.
   if (BB == &BB->getParent()->getEntryBlock()) {
     assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
-    // Bofore giving up, see if we can prove the pointer non-null local to
+    // Before giving up, see if we can prove the pointer non-null local to
     // this particular block.
     if (Val->getType()->isPointerTy() &&
         (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) {
@@ -839,13 +902,19 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   }
 
   // Loop over all of our predecessors, merging what we know from them into
-  // result.
-  bool EdgesMissing = false;
+  // result.  If we encounter an unexplored predecessor, we eagerly explore it
+  // in a depth first manner.  In practice, this has the effect of discovering
+  // paths we can't analyze eagerly without spending compile times analyzing
+  // other paths.  This heuristic benefits from the fact that predecessors are
+  // frequently arranged such that dominating ones come first and we quickly
+  // find a path to function entry.  TODO: We should consider explicitly
+  // canonicalizing to make this true rather than relying on this happy
+  // accident.  
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     LVILatticeVal EdgeResult;
-    EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
-    if (EdgesMissing)
-      continue;
+    if (!getEdgeValue(Val, *PI, BB, EdgeResult))
+      // Explore that input, then return here
+      return false;
 
     Result.mergeIn(EdgeResult, DL);
 
@@ -866,8 +935,6 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
       return true;
     }
   }
-  if (EdgesMissing)
-    return false;
 
   // Return the merged value, which is more precise than 'overdefined'.
   assert(!Result.isOverdefined());
@@ -880,8 +947,8 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
   LVILatticeVal Result;  // Start Undefined.
 
   // Loop over all of our predecessors, merging what we know from them into
-  // result.
-  bool EdgesMissing = false;
+  // result.  See the comment about the chosen traversal order in
+  // solveBlockValueNonLocal; the same reasoning applies here.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     BasicBlock *PhiBB = PN->getIncomingBlock(i);
     Value *PhiVal = PN->getIncomingValue(i);
@@ -889,9 +956,9 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
     // Note that we can provide PN as the context value to getEdgeValue, even
     // though the results will be cached, because PN is the value being used as
     // the cache key in the caller.
-    EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN);
-    if (EdgesMissing)
-      continue;
+    if (!getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN))
+      // Explore that input, then return here
+      return false;
 
     Result.mergeIn(EdgeResult, DL);
 
@@ -905,8 +972,6 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
       return true;
     }
   }
-  if (EdgesMissing)
-    return false;
 
   // Return the merged value, which is more precise than 'overdefined'.
   assert(!Result.isOverdefined() && "Possible PHI in entry block?");
@@ -982,8 +1047,8 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
   }
 
   if (TrueVal.isConstantRange() && FalseVal.isConstantRange()) {
-    ConstantRange TrueCR = TrueVal.getConstantRange();
-    ConstantRange FalseCR = FalseVal.getConstantRange();
+    const ConstantRange &TrueCR = TrueVal.getConstantRange();
+    const ConstantRange &FalseCR = FalseVal.getConstantRange();
     Value *LHS = nullptr;
     Value *RHS = nullptr;
     SelectPatternResult SPR = matchSelectPattern(SI, LHS, RHS);
@@ -1071,9 +1136,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
 }
 
 bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
-                                             Instruction *BBI,
-                                             BasicBlock *BB) {
-  if (!BBI->getOperand(0)->getType()->isSized()) {
+                                            CastInst *CI,
+                                            BasicBlock *BB) {
+  if (!CI->getOperand(0)->getType()->isSized()) {
     // Without knowing how wide the input is, we can't analyze it in any useful
     // way.
     BBLV = LVILatticeVal::getOverdefined();
@@ -1083,7 +1148,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
   // Filter out casts we don't know how to reason about before attempting to
   // recurse on our operand.  This can cut a long search short if we know we're
   // not going to be able to get any useful information anways.
-  switch (BBI->getOpcode()) {
+  switch (CI->getOpcode()) {
   case Instruction::Trunc:
   case Instruction::SExt:
   case Instruction::ZExt:
@@ -1100,44 +1165,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
   // Figure out the range of the LHS.  If that fails, we still apply the
   // transfer rule on the full set since we may be able to locally infer
   // interesting facts.
-  if (!hasBlockValue(BBI->getOperand(0), BB))
-    if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+  if (!hasBlockValue(CI->getOperand(0), BB))
+    if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
       // More work to do before applying this transfer rule.
       return false;
 
   const unsigned OperandBitWidth =
-    DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+    DL.getTypeSizeInBits(CI->getOperand(0)->getType());
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
-  if (hasBlockValue(BBI->getOperand(0), BB)) {
-    LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
-                                                  BBI);
+  if (hasBlockValue(CI->getOperand(0), BB)) {
+    LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB);
+    intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
+                                                  CI);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
 
-  const unsigned ResultBitWidth =
-    cast<IntegerType>(BBI->getType())->getBitWidth();
+  const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
 
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  auto CastOp = (Instruction::CastOps) BBI->getOpcode();
-  BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
+  BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(),
+                                                 ResultBitWidth));
   return true;
 }
 
 bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
-                                                 Instruction *BBI,
+                                                 BinaryOperator *BO,
                                                  BasicBlock *BB) {
 
-  assert(BBI->getOperand(0)->getType()->isSized() &&
+  assert(BO->getOperand(0)->getType()->isSized() &&
          "all operands to binary operators are sized");
 
   // Filter out operators we don't know how to reason about before attempting to
   // recurse on our operand(s).  This can cut a long search short if we know
-  // we're not going to be able to get any useful information anways.
-  switch (BBI->getOpcode()) {
+  // we're not going to be able to get any useful information anyways.
+  switch (BO->getOpcode()) {
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
@@ -1159,29 +1223,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
   // Figure out the range of the LHS.  If that fails, use a conservative range,
   // but apply the transfer rule anyways.  This lets us pick up facts from
   // expressions like "and i32 (call i32 @foo()), 32"
-  if (!hasBlockValue(BBI->getOperand(0), BB))
-    if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+  if (!hasBlockValue(BO->getOperand(0), BB))
+    if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
       // More work to do before applying this transfer rule.
       return false;
 
   const unsigned OperandBitWidth =
-    DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+    DL.getTypeSizeInBits(BO->getOperand(0)->getType());
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
-  if (hasBlockValue(BBI->getOperand(0), BB)) {
-    LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
-                                                  BBI);
+  if (hasBlockValue(BO->getOperand(0), BB)) {
+    LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB);
+    intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
+                                                  BO);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
 
-  ConstantInt *RHS = cast<ConstantInt>(BBI->getOperand(1));
+  ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
   ConstantRange RHSRange = ConstantRange(RHS->getValue());
 
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+  Instruction::BinaryOps BinOp = BO->getOpcode();
   BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
   return true;
 }
@@ -1260,12 +1324,12 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
     return getValueFromICmpCondition(Val, ICI, isTrueDest);
 
   // Handle conditions in the form of (cond1 && cond2), we know that on the
-  // true dest path both of the conditions hold.
-  if (!isTrueDest)
-    return LVILatticeVal::getOverdefined();
-
+  // true dest path both of the conditions hold. Similarly for conditions of
+  // the form (cond1 || cond2), we know that on the false dest path neither
+  // condition holds.
   BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
-  if (!BO || BO->getOpcode() != BinaryOperator::And)
+  if (!BO || (isTrueDest && BO->getOpcode() != BinaryOperator::And) ||
+             (!isTrueDest && BO->getOpcode() != BinaryOperator::Or))
     return LVILatticeVal::getOverdefined();
 
   auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
@@ -1333,14 +1397,14 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
     unsigned BitWidth = Val->getType()->getIntegerBitWidth();
     ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
 
-    for (SwitchInst::CaseIt i : SI->cases()) {
-      ConstantRange EdgeVal(i.getCaseValue()->getValue());
+    for (auto Case : SI->cases()) {
+      ConstantRange EdgeVal(Case.getCaseValue()->getValue());
       if (DefaultCase) {
         // It is possible that the default destination is the destination of
         // some cases. There is no need to perform difference for those cases.
-        if (i.getCaseSuccessor() != BBTo)
+        if (Case.getCaseSuccessor() != BBTo)
           EdgesVals = EdgesVals.difference(EdgeVal);
-      } else if (i.getCaseSuccessor() == BBTo)
+      } else if (Case.getCaseSuccessor() == BBTo)
         EdgesVals = EdgesVals.unionWith(EdgeVal);
     }
     Result = LVILatticeVal::getRange(std::move(EdgesVals));
@@ -1352,8 +1416,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
 /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
 /// the basic block if the edge does not constrain Val.
 bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
-                                      BasicBlock *BBTo, LVILatticeVal &Result,
-                                      Instruction *CxtI) {
+                                     BasicBlock *BBTo, LVILatticeVal &Result,
+                                     Instruction *CxtI) {
   // If already a constant, there is nothing to compute.
   if (Constant *VC = dyn_cast<Constant>(Val)) {
     Result = LVILatticeVal::get(VC);
@@ -1503,6 +1567,18 @@ void LazyValueInfo::releaseMemory() {
   }
 }
 
+bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
+  // We need to invalidate if we have either failed to preserve this analyses
+  // result directly or if any of its dependencies have been invalidated.
+  auto PAC = PA.getChecker<LazyValueAnalysis>();
+  if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+      (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)))
+    return true;
+
+  return false;
+}
+
 void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); }
 
 LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
@@ -1510,7 +1586,7 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM)
   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
   auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
 
-  return LazyValueInfo(&AC, &TLI, DT);
+  return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI, DT);
 }
 
 /// Returns true if we can statically tell that this value will never be a
@@ -1540,7 +1616,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
   if (Result.isConstant())
     return Result.getConstant();
   if (Result.isConstantRange()) {
-    ConstantRange CR = Result.getConstantRange();
+    const ConstantRange &CR = Result.getConstantRange();
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
   }
@@ -1577,71 +1653,90 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
   if (Result.isConstant())
     return Result.getConstant();
   if (Result.isConstantRange()) {
-    ConstantRange CR = Result.getConstantRange();
+    const ConstantRange &CR = Result.getConstantRange();
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
   }
   return nullptr;
 }
 
+ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
+                                                    BasicBlock *FromBB,
+                                                    BasicBlock *ToBB,
+                                                    Instruction *CxtI) {
+  unsigned Width = V->getType()->getIntegerBitWidth();
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
+  LVILatticeVal Result =
+      getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+  if (Result.isUndefined())
+    return ConstantRange(Width, /*isFullSet=*/false);
+  if (Result.isConstantRange())
+    return Result.getConstantRange();
+  // We represent ConstantInt constants as constant ranges but other kinds
+  // of integer constants, i.e. ConstantExpr will be tagged as constants
+  assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
+         "ConstantInt value must be represented as constantrange");
+  return ConstantRange(Width, /*isFullSet=*/true);
+}
+
 static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
-                                                  LVILatticeVal &Result,
+                                                  const LVILatticeVal &Val,
                                                   const DataLayout &DL,
                                                   TargetLibraryInfo *TLI) {
 
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = nullptr;
-  if (Result.isConstant()) {
-    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
-                                          TLI);
+  if (Val.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI);
     if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
     return LazyValueInfo::Unknown;
   }
 
-  if (Result.isConstantRange()) {
+  if (Val.isConstantRange()) {
     ConstantInt *CI = dyn_cast<ConstantInt>(C);
     if (!CI) return LazyValueInfo::Unknown;
 
-    ConstantRange CR = Result.getConstantRange();
+    const ConstantRange &CR = Val.getConstantRange();
     if (Pred == ICmpInst::ICMP_EQ) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::False;
 
-      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+      if (CR.isSingleElement())
         return LazyValueInfo::True;
     } else if (Pred == ICmpInst::ICMP_NE) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::True;
 
-      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+      if (CR.isSingleElement())
+        return LazyValueInfo::False;
+    } else {
+      // Handle more complex predicates.
+      ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+          (ICmpInst::Predicate)Pred, CI->getValue());
+      if (TrueValues.contains(CR))
+        return LazyValueInfo::True;
+      if (TrueValues.inverse().contains(CR))
         return LazyValueInfo::False;
     }
-
-    // Handle more complex predicates.
-    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
-        (ICmpInst::Predicate)Pred, CI->getValue());
-    if (TrueValues.contains(CR))
-      return LazyValueInfo::True;
-    if (TrueValues.inverse().contains(CR))
-      return LazyValueInfo::False;
     return LazyValueInfo::Unknown;
   }
 
-  if (Result.isNotConstant()) {
+  if (Val.isNotConstant()) {
     // If this is an equality comparison, we can try to fold it knowing that
     // "V != C1".
     if (Pred == ICmpInst::ICMP_EQ) {
       // !C1 == C -> false iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, DL,
+                                            Val.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return LazyValueInfo::False;
     } else if (Pred == ICmpInst::ICMP_NE) {
       // !C1 != C -> true iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, DL,
+                                            Val.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return LazyValueInfo::True;
@@ -1780,3 +1875,97 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) {
     getImpl(PImpl, AC, &DL, DT).eraseBlock(BB);
   }
 }
+
+
+void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
+  if (PImpl) {
+    getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS);
+  }
+}
+
+// Print the LVI for the function arguments at the start of each basic block.
+void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
+    const BasicBlock *BB, formatted_raw_ostream &OS) {
+  // Find if there are latticevalues defined for arguments of the function.
+  auto *F = BB->getParent();
+  for (auto &Arg : F->args()) {
+    LVILatticeVal Result = LVIImpl->getValueInBlock(
+        const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB));
+    if (Result.isUndefined())
+      continue;
+    OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n";
+  }
+}
+
+// This function prints the LVI analysis for the instruction I at the beginning
+// of various basic blocks. It relies on calculated values that are stored in
+// the LazyValueInfoCache, and in the absence of cached values, recalculte the
+// LazyValueInfo for `I`, and print that info.
+void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
+    const Instruction *I, formatted_raw_ostream &OS) {
+
+  auto *ParentBB = I->getParent();
+  SmallPtrSet<const BasicBlock*, 16> BlocksContainingLVI;
+  // We can generate (solve) LVI values only for blocks that are dominated by
+  // the I's parent. However, to avoid generating LVI for all dominating blocks,
+  // that contain redundant/uninteresting information, we print LVI for
+  // blocks that may use this LVI information (such as immediate successor
+  // blocks, and blocks that contain uses of `I`).
+  auto printResult = [&](const BasicBlock *BB) {
+    if (!BlocksContainingLVI.insert(BB).second)
+      return;
+    LVILatticeVal Result = LVIImpl->getValueInBlock(
+        const_cast<Instruction *>(I), const_cast<BasicBlock *>(BB));
+      OS << "; LatticeVal for: '" << *I << "' in BB: '";
+      BB->printAsOperand(OS, false);
+      OS << "' is: " << Result << "\n";
+  };
+
+  printResult(ParentBB);
+  // Print the LVI analysis results for the the immediate successor blocks, that
+  // are dominated by `ParentBB`.
+  for (auto *BBSucc : successors(ParentBB))
+    if (DT.dominates(ParentBB, BBSucc))
+      printResult(BBSucc);
+
+  // Print LVI in blocks where `I` is used.
+  for (auto *U : I->users())
+    if (auto *UseI = dyn_cast<Instruction>(U))
+      if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent()))
+        printResult(UseI->getParent());
+
+}
+
+namespace {
+// Printer class for LazyValueInfo results.
+class LazyValueInfoPrinter : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  LazyValueInfoPrinter() : FunctionPass(ID) {
+    initializeLazyValueInfoPrinterPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<LazyValueInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+  }
+
+  // Get the mandatory dominator tree analysis and pass this in to the
+  // LVIPrinter. We cannot rely on the LVI's DT, since it's optional.
+  bool runOnFunction(Function &F) override {
+    dbgs() << "LVI for function '" << F.getName() << "':\n";
+    auto &LVI = getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+    auto &DTree = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LVI.printLVI(F, DTree, dbgs());
+    return false;
+  }
+};
+}
+
+char LazyValueInfoPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(LazyValueInfoPrinter, "print-lazy-value-info",
+                "Lazy Value Info Printer Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LazyValueInfoPrinter, "print-lazy-value-info",
+                "Lazy Value Info Printer Pass", false, false)
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index 2ca46b1..ada600a 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -58,18 +58,19 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -404,7 +405,7 @@ void Lint::visitMemoryReference(Instruction &I,
   Assert(!isa<UndefValue>(UnderlyingObject),
          "Undefined behavior: Undef pointer dereference", &I);
   Assert(!isa<ConstantInt>(UnderlyingObject) ||
-             !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+             !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
          "Unusual: All-ones pointer dereference", &I);
   Assert(!isa<ConstantInt>(UnderlyingObject) ||
              !cast<ConstantInt>(UnderlyingObject)->isOne(),
@@ -533,11 +534,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
 
   VectorType *VecTy = dyn_cast<VectorType>(V->getType());
   if (!VecTy) {
-    unsigned BitWidth = V->getType()->getIntegerBitWidth();
-    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
-                     dyn_cast<Instruction>(V), DT);
-    return KnownZero.isAllOnesValue();
+    KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
+    return Known.isZero();
   }
 
   // Per-component check doesn't work with zeroinitializer
@@ -550,15 +548,13 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
 
   // For a vector, KnownZero will only be true if all values are zero, so check
   // this per component
-  unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
   for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
     Constant *Elem = C->getAggregateElement(I);
     if (isa<UndefValue>(Elem))
       return true;
 
-    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    computeKnownBits(Elem, KnownZero, KnownOne, DL);
-    if (KnownZero.isAllOnesValue())
+    KnownBits Known = computeKnownBits(Elem, DL);
+    if (Known.isZero())
       return true;
   }
 
@@ -699,7 +695,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+    if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (auto *C = dyn_cast<Constant>(V)) {
     if (Value *W = ConstantFoldConstant(C, *DL, TLI))
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index e46541e..591b0fc 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -117,6 +117,16 @@ static bool isDereferenceableAndAlignedPointer(
 }
 
 bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+                                              const APInt &Size,
+                                              const DataLayout &DL,
+                                              const Instruction *CtxI,
+                                              const DominatorTree *DT) {
+  SmallPtrSet<const Value *, 32> Visited;
+  return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT,
+                                              Visited);
+}
+
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
                                               const DataLayout &DL,
                                               const Instruction *CtxI,
                                               const DominatorTree *DT) {
@@ -312,21 +322,26 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
                                       BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AliasAnalysis *AA, bool *IsLoadCSE) {
-  if (MaxInstsToScan == 0)
-    MaxInstsToScan = ~0U;
-
-  Value *Ptr = Load->getPointerOperand();
-  Type *AccessTy = Load->getType();
-
-  // We can never remove a volatile load
-  if (Load->isVolatile())
-    return nullptr;
-
-  // Anything stronger than unordered is currently unimplemented.
+                                      AliasAnalysis *AA, bool *IsLoad,
+                                      unsigned *NumScanedInst) {
+  // Don't CSE load that is volatile or anything stronger than unordered.
   if (!Load->isUnordered())
     return nullptr;
 
+  return FindAvailablePtrLoadStore(
+      Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB,
+      ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst);
+}
+
+Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
+                                       bool AtLeastAtomic, BasicBlock *ScanBB,
+                                       BasicBlock::iterator &ScanFrom,
+                                       unsigned MaxInstsToScan,
+                                       AliasAnalysis *AA, bool *IsLoadCSE,
+                                       unsigned *NumScanedInst) {
+  if (MaxInstsToScan == 0)
+    MaxInstsToScan = ~0U;
+
   const DataLayout &DL = ScanBB->getModule()->getDataLayout();
 
   // Try to get the store size for the type.
@@ -344,6 +359,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
     // Restore ScanFrom to expected value in case next test succeeds
     ScanFrom++;
 
+    if (NumScanedInst)
+      ++(*NumScanedInst);
+
     // Don't scan huge blocks.
     if (MaxInstsToScan-- == 0)
       return nullptr;
@@ -359,7 +377,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
 
         // We can value forward from an atomic to a non-atomic, but not the
         // other way around.
-        if (LI->isAtomic() < Load->isAtomic())
+        if (LI->isAtomic() < AtLeastAtomic)
           return nullptr;
 
         if (IsLoadCSE)
@@ -378,7 +396,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
 
         // We can value forward from an atomic to a non-atomic, but not the
         // other way around.
-        if (SI->isAtomic() < Load->isAtomic())
+        if (SI->isAtomic() < AtLeastAtomic)
           return nullptr;
 
         if (IsLoadCSE)
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bf80072..4ba1258 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() {
   return ::VectorizationInterleave.getNumOccurrences() > 0;
 }
 
-void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
-                                    const Loop *TheLoop, const char *PassName,
-                                    OptimizationRemarkEmitter &ORE) {
-  DebugLoc DL = TheLoop->getStartLoc();
-  const Value *V = TheLoop->getHeader();
-  if (const Instruction *I = Message.getInstr()) {
-    // If there is no debug location attached to the instruction, revert back to
-    // using the loop's.
-    if (I->getDebugLoc())
-      DL = I->getDebugLoc();
-    V = I->getParent();
-  }
-  ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
-}
-
 Value *llvm::stripIntegerCast(Value *V) {
   if (auto *CI = dyn_cast<CastInst>(V))
     if (CI->getOperand(0)->getType()->isIntegerTy())
@@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
     // Strip casts.
     StrideVal = stripIntegerCast(StrideVal);
 
-    // Replace symbolic stride by one.
-    Value *One = ConstantInt::get(StrideVal->getType(), 1);
-    ValueToValueMap RewriteMap;
-    RewriteMap[StrideVal] = One;
-
     ScalarEvolution *SE = PSE.getSE();
     const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
     const auto *CT =
@@ -518,7 +498,7 @@ class AccessAnalysis {
 public:
   /// \brief Read or write access location.
   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
-  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
 
   AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
                  MemoryDepChecker::DepCandidates &DA,
@@ -570,7 +550,7 @@ public:
     DepChecker.clearDependences();
   }
 
-  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
 
 private:
   typedef SetVector<MemAccessInfo> PtrAccessSet;
@@ -584,8 +564,8 @@ private:
 
   const DataLayout &DL;
 
-  /// Set of accesses that need a further dependence check.
-  MemAccessInfoSet CheckDeps;
+  /// List of accesses that need a further dependence check.
+  MemAccessInfoList CheckDeps;
 
   /// Set of pointers that are read only.
   SmallPtrSet<Value*, 16> ReadOnlyPtr;
@@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() {
           // there is no other write to the ptr - this is an optimization to
           // catch "a[i] = a[i] + " without having to do a dependence check).
           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
-            CheckDeps.insert(Access);
+            CheckDeps.push_back(Access);
             IsRTCheckAnalysisNeeded = true;
           }
 
@@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
   return false;
 }
 
+/// Given a non-constant (unknown) dependence-distance \p Dist between two 
+/// memory accesses, that have the same stride whose absolute value is given
+/// in \p Stride, and that have the same type size \p TypeByteSize,
+/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
+/// possible to prove statically that the dependence distance is larger
+/// than the range that the accesses will travel through the execution of
+/// the loop. If so, return true; false otherwise. This is useful for
+/// example in loops such as the following (PR31098):
+///     for (i = 0; i < D; ++i) {
+///                = out[i];
+///       out[i+D] =
+///     }
+static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
+                                     const SCEV &BackedgeTakenCount,
+                                     const SCEV &Dist, uint64_t Stride,
+                                     uint64_t TypeByteSize) {
+
+  // If we can prove that
+  //      (**) |Dist| > BackedgeTakenCount * Step
+  // where Step is the absolute stride of the memory accesses in bytes, 
+  // then there is no dependence.
+  //
+  // Ratioanle: 
+  // We basically want to check if the absolute distance (|Dist/Step|) 
+  // is >= the loop iteration count (or > BackedgeTakenCount). 
+  // This is equivalent to the Strong SIV Test (Practical Dependence Testing, 
+  // Section 4.2.1); Note, that for vectorization it is sufficient to prove 
+  // that the dependence distance is >= VF; This is checked elsewhere.
+  // But in some cases we can prune unknown dependence distances early, and 
+  // even before selecting the VF, and without a runtime test, by comparing 
+  // the distance against the loop iteration count. Since the vectorized code 
+  // will be executed only if LoopCount >= VF, proving distance >= LoopCount 
+  // also guarantees that distance >= VF.
+  //
+  const uint64_t ByteStride = Stride * TypeByteSize;
+  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
+  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
+
+  const SCEV *CastedDist = &Dist;
+  const SCEV *CastedProduct = Product;
+  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
+  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+
+  // The dependence distance can be positive/negative, so we sign extend Dist; 
+  // The multiplication of the absolute stride in bytes and the 
+  // backdgeTakenCount is non-negative, so we zero extend Product.
+  if (DistTypeSize > ProductTypeSize)
+    CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
+  else
+    CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
+
+  // Is  Dist - (BackedgeTakenCount * Step) > 0 ?
+  // (If so, then we have proven (**) because |Dist| >= Dist)
+  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
+  if (SE.isKnownPositive(Minus))
+    return true;
+
+  // Second try: Is  -Dist - (BackedgeTakenCount * Step) > 0 ?
+  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
+  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
+  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
+  if (SE.isKnownPositive(Minus))
+    return true;
+
+  return false;
+}
+
 /// \brief Check the dependence for two accesses with the same stride \p Stride.
 /// \p Distance is the positive distance and \p TypeByteSize is type size in
 /// bytes.
@@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     return Dependence::Unknown;
   }
 
+  Type *ATy = APtr->getType()->getPointerElementType();
+  Type *BTy = BPtr->getType()->getPointerElementType();
+  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
+  uint64_t Stride = std::abs(StrideAPtr);
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
   if (!C) {
+    if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
+        isSafeDependenceDistance(DL, *(PSE.getSE()),
+                                 *(PSE.getBackedgeTakenCount()), *Dist, Stride,
+                                 TypeByteSize))
+      return Dependence::NoDep;
+
     DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
     ShouldRetryWithRuntimeCheck = true;
     return Dependence::Unknown;
   }
 
-  Type *ATy = APtr->getType()->getPointerElementType();
-  Type *BTy = BPtr->getType()->getPointerElementType();
-  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
-  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
-
   const APInt &Val = C->getAPInt();
   int64_t Distance = Val.getSExtValue();
-  uint64_t Stride = std::abs(StrideAPtr);
 
   // Attempt to prove strided accesses independent.
   if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 }
 
 bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
-                                   MemAccessInfoSet &CheckDeps,
+                                   MemAccessInfoList &CheckDeps,
                                    const ValueToValueMap &Strides) {
 
   MaxSafeDepDistBytes = -1;
-  while (!CheckDeps.empty()) {
-    MemAccessInfo CurAccess = *CheckDeps.begin();
+  SmallPtrSet<MemAccessInfo, 8> Visited;
+  for (MemAccessInfo CurAccess : CheckDeps) {
+    if (Visited.count(CurAccess))
+      continue;
 
     // Get the relevant memory access set.
     EquivalenceClasses<MemAccessInfo>::iterator I =
@@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
 
     // Check every access pair.
     while (AI != AE) {
-      CheckDeps.erase(*AI);
+      Visited.insert(*AI);
       EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
       while (OI != AE) {
         // Check every accessing instruction pair in program order.
@@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
     Value *NewPtr = (Inst && TheLoop->contains(Inst))
                         ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
                         : Ptr;
-    return {NewPtr, NewPtr};
+    // We must return a half-open range, which means incrementing Sc.
+    const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
+    Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
+    return {NewPtr, NewPtrPlusOne};
   } else {
     Value *Start = nullptr, *End = nullptr;
     DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 5be3ee3..e4a0f90 100644
--- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -31,24 +31,10 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
     FunctionAnalysisManager::Invalidator &Inv) {
   // First compute the sequence of IR units covered by this proxy. We will want
   // to visit this in postorder, but because this is a tree structure we can do
-  // this by building a preorder sequence and walking it in reverse.
-  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
-  // Note that we want to walk the roots in reverse order because we will end
-  // up reversing the preorder sequence. However, it happens that the loop nest
-  // roots are in reverse order within the LoopInfo object. So we just walk
-  // forward here.
-  // FIXME: If we change the order of LoopInfo we will want to add a reverse
-  // here.
-  for (Loop *RootL : *LI) {
-    assert(PreOrderWorklist.empty() &&
-           "Must start with an empty preorder walk worklist.");
-    PreOrderWorklist.push_back(RootL);
-    do {
-      Loop *L = PreOrderWorklist.pop_back_val();
-      PreOrderWorklist.append(L->begin(), L->end());
-      PreOrderLoops.push_back(L);
-    } while (!PreOrderWorklist.empty());
-  }
+  // this by building a preorder sequence and walking it backwards. We also
+  // want siblings in forward program order to match the LoopPassManager so we
+  // get the preorder with siblings reversed.
+  SmallVector<Loop *, 4> PreOrderLoops = LI->getLoopsInReverseSiblingPreorder();
 
   // If this proxy or the loop info is going to be invalidated, we also need
   // to clear all the keys coming from that analysis. We also completely blow
@@ -145,7 +131,6 @@ LoopAnalysisManagerFunctionProxy::run(Function &F,
 
 PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
   PreservedAnalyses PA;
-  PA.preserve<AssumptionAnalysis>();
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
   PA.preserve<LoopAnalysisManagerFunctionProxy>();
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index f449ce9..697b586 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -40,9 +40,9 @@ template class llvm::LoopInfoBase<BasicBlock, Loop>;
 
 // Always verify loopinfo if expensive checking is enabled.
 #ifdef EXPENSIVE_CHECKS
-static bool VerifyLoopInfo = true;
+bool llvm::VerifyLoopInfo = true;
 #else
-static bool VerifyLoopInfo = false;
+bool llvm::VerifyLoopInfo = false;
 #endif
 static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
@@ -131,13 +131,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
     PHINode *PN = cast<PHINode>(I);
     if (ConstantInt *CI =
         dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
-      if (CI->isNullValue())
+      if (CI->isZero())
         if (Instruction *Inc =
             dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
           if (Inc->getOpcode() == Instruction::Add &&
                 Inc->getOperand(0) == PN)
             if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
-              if (CI->equalsInt(1))
+              if (CI->isOne())
                 return PN;
   }
   return nullptr;
@@ -211,9 +211,11 @@ bool Loop::isSafeToClone() const {
 
 MDNode *Loop::getLoopID() const {
   MDNode *LoopID = nullptr;
-  if (isLoopSimplifyForm()) {
-    LoopID = getLoopLatch()->getTerminator()->getMetadata(LLVMContext::MD_loop);
+  if (BasicBlock *Latch = getLoopLatch()) {
+    LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
   } else {
+    assert(!getLoopLatch() &&
+           "The loop should have no single latch at this point");
     // Go through each predecessor of the loop header and check the
     // terminator for the metadata.
     BasicBlock *H = getHeader();
@@ -248,11 +250,12 @@ void Loop::setLoopID(MDNode *LoopID) const {
   assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
   assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
 
-  if (isLoopSimplifyForm()) {
-    getLoopLatch()->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
+  if (BasicBlock *Latch = getLoopLatch()) {
+    Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
     return;
   }
 
+  assert(!getLoopLatch() && "The loop should have no single latch at this point");
   BasicBlock *H = getHeader();
   for (BasicBlock *BB : this->blocks()) {
     TerminatorInst *TI = BB->getTerminator();
@@ -457,7 +460,7 @@ protected:
 void UnloopUpdater::updateBlockParents() {
   if (Unloop.getNumBlocks()) {
     // Perform a post order CFG traversal of all blocks within this loop,
-    // propagating the nearest loop from sucessors to predecessors.
+    // propagating the nearest loop from successors to predecessors.
     LoopBlocksTraversal Traversal(DFS, LI);
     for (BasicBlock *POI : Traversal) {
 
@@ -606,10 +609,19 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
   return NearLoop;
 }
 
-LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) {
   analyze(DomTree);
 }
 
+bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+                          FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis, all analyses on functions, or the function's
+  // CFG have been preserved.
+  auto PAC = PA.getChecker<LoopAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+           PAC.preservedSet<CFGAnalyses>());
+}
+
 void LoopInfo::markAsRemoved(Loop *Unloop) {
   assert(!Unloop->isInvalid() && "Loop has already been removed");
   Unloop->invalidate();
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 3f4a079..e988f64 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -54,6 +54,8 @@ public:
     }
     return false;
   }
+
+  StringRef getPassName() const override { return "Print Loop IR"; }
 };
 
 char PrintLoopPassWrapper::ID = 0;
@@ -71,30 +73,23 @@ LPPassManager::LPPassManager()
   CurrentLoop = nullptr;
 }
 
-// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
-Loop &LPPassManager::addLoop(Loop *ParentLoop) {
-  // Create a new loop. LI will take ownership.
-  Loop *L = new Loop();
-
-  // Insert into the loop nest and the loop queue.
-  if (!ParentLoop) {
+// Insert loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::addLoop(Loop &L) {
+  if (!L.getParentLoop()) {
     // This is the top level loop.
-    LI->addTopLevelLoop(L);
-    LQ.push_front(L);
-    return *L;
+    LQ.push_front(&L);
+    return;
   }
 
-  ParentLoop->addChildLoop(L);
   // Insert L into the loop queue after the parent loop.
   for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
-    if (*I == L->getParentLoop()) {
+    if (*I == L.getParentLoop()) {
       // deque does not support insert after.
       ++I;
-      LQ.insert(I, 1, L);
-      break;
+      LQ.insert(I, 1, &L);
+      return;
     }
   }
-  return *L;
 }
 
 /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index e7a85ae..5c0cbb2 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
index fa0cc5a..4231a78 100644
--- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 2d82740..7327c07 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -37,6 +37,7 @@ enum AllocType : uint8_t {
   CallocLike         = 1<<2, // allocates + bzero
   ReallocLike        = 1<<3, // reallocates
   StrDupLike         = 1<<4,
+  MallocOrCallocLike = MallocLike | CallocLike,
   AllocLike          = MallocLike | CallocLike | StrDupLike,
   AnyAlloc           = AllocLike | ReallocLike
 };
@@ -50,35 +51,35 @@ struct AllocFnsTy {
 
 // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
 // know which functions are nounwind, noalias, nocapture parameters, etc.
-static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = {
-  {LibFunc::malloc,              {MallocLike,  1, 0,  -1}},
-  {LibFunc::valloc,              {MallocLike,  1, 0,  -1}},
-  {LibFunc::Znwj,                {OpNewLike,   1, 0,  -1}}, // new(unsigned int)
-  {LibFunc::ZnwjRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new(unsigned int, nothrow)
-  {LibFunc::Znwm,                {OpNewLike,   1, 0,  -1}}, // new(unsigned long)
-  {LibFunc::ZnwmRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new(unsigned long, nothrow)
-  {LibFunc::Znaj,                {OpNewLike,   1, 0,  -1}}, // new[](unsigned int)
-  {LibFunc::ZnajRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new[](unsigned int, nothrow)
-  {LibFunc::Znam,                {OpNewLike,   1, 0,  -1}}, // new[](unsigned long)
-  {LibFunc::ZnamRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new[](unsigned long, nothrow)
-  {LibFunc::msvc_new_int,         {OpNewLike,   1, 0,  -1}}, // new(unsigned int)
-  {LibFunc::msvc_new_int_nothrow, {MallocLike,  2, 0,  -1}}, // new(unsigned int, nothrow)
-  {LibFunc::msvc_new_longlong,         {OpNewLike,   1, 0,  -1}}, // new(unsigned long long)
-  {LibFunc::msvc_new_longlong_nothrow, {MallocLike,  2, 0,  -1}}, // new(unsigned long long, nothrow)
-  {LibFunc::msvc_new_array_int,         {OpNewLike,   1, 0,  -1}}, // new[](unsigned int)
-  {LibFunc::msvc_new_array_int_nothrow, {MallocLike,  2, 0,  -1}}, // new[](unsigned int, nothrow)
-  {LibFunc::msvc_new_array_longlong,         {OpNewLike,   1, 0,  -1}}, // new[](unsigned long long)
-  {LibFunc::msvc_new_array_longlong_nothrow, {MallocLike,  2, 0,  -1}}, // new[](unsigned long long, nothrow)
-  {LibFunc::calloc,              {CallocLike,  2, 0,   1}},
-  {LibFunc::realloc,             {ReallocLike, 2, 1,  -1}},
-  {LibFunc::reallocf,            {ReallocLike, 2, 1,  -1}},
-  {LibFunc::strdup,              {StrDupLike,  1, -1, -1}},
-  {LibFunc::strndup,             {StrDupLike,  2, 1,  -1}}
+static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
+  {LibFunc_malloc,              {MallocLike,  1, 0,  -1}},
+  {LibFunc_valloc,              {MallocLike,  1, 0,  -1}},
+  {LibFunc_Znwj,                {OpNewLike,   1, 0,  -1}}, // new(unsigned int)
+  {LibFunc_ZnwjRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new(unsigned int, nothrow)
+  {LibFunc_Znwm,                {OpNewLike,   1, 0,  -1}}, // new(unsigned long)
+  {LibFunc_ZnwmRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new(unsigned long, nothrow)
+  {LibFunc_Znaj,                {OpNewLike,   1, 0,  -1}}, // new[](unsigned int)
+  {LibFunc_ZnajRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new[](unsigned int, nothrow)
+  {LibFunc_Znam,                {OpNewLike,   1, 0,  -1}}, // new[](unsigned long)
+  {LibFunc_ZnamRKSt9nothrow_t,  {MallocLike,  2, 0,  -1}}, // new[](unsigned long, nothrow)
+  {LibFunc_msvc_new_int,         {OpNewLike,   1, 0,  -1}}, // new(unsigned int)
+  {LibFunc_msvc_new_int_nothrow, {MallocLike,  2, 0,  -1}}, // new(unsigned int, nothrow)
+  {LibFunc_msvc_new_longlong,         {OpNewLike,   1, 0,  -1}}, // new(unsigned long long)
+  {LibFunc_msvc_new_longlong_nothrow, {MallocLike,  2, 0,  -1}}, // new(unsigned long long, nothrow)
+  {LibFunc_msvc_new_array_int,         {OpNewLike,   1, 0,  -1}}, // new[](unsigned int)
+  {LibFunc_msvc_new_array_int_nothrow, {MallocLike,  2, 0,  -1}}, // new[](unsigned int, nothrow)
+  {LibFunc_msvc_new_array_longlong,         {OpNewLike,   1, 0,  -1}}, // new[](unsigned long long)
+  {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike,  2, 0,  -1}}, // new[](unsigned long long, nothrow)
+  {LibFunc_calloc,              {CallocLike,  2, 0,   1}},
+  {LibFunc_realloc,             {ReallocLike, 2, 1,  -1}},
+  {LibFunc_reallocf,            {ReallocLike, 2, 1,  -1}},
+  {LibFunc_strdup,              {StrDupLike,  1, -1, -1}},
+  {LibFunc_strndup,             {StrDupLike,  2, 1,  -1}}
   // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
 };
 
-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
-                                   bool &IsNoBuiltin) {
+static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+                                         bool &IsNoBuiltin) {
   // Don't care about intrinsics in this case.
   if (isa<IntrinsicInst>(V))
     return nullptr;
@@ -86,13 +87,13 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
   if (LookThroughBitCast)
     V = V->stripPointerCasts();
 
-  CallSite CS(const_cast<Value*>(V));
+  ImmutableCallSite CS(V);
   if (!CS.getInstruction())
     return nullptr;
 
   IsNoBuiltin = CS.isNoBuiltin();
 
-  Function *Callee = CS.getCalledFunction();
+  const Function *Callee = CS.getCalledFunction();
   if (!Callee || !Callee->isDeclaration())
     return nullptr;
   return Callee;
@@ -106,12 +107,12 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
                              const TargetLibraryInfo *TLI) {
   // Make sure that the function is available.
   StringRef FnName = Callee->getName();
-  LibFunc::Func TLIFn;
+  LibFunc TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return None;
 
   const auto *Iter = find_if(
-      AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
+      AllocationFnData, [TLIFn](const std::pair<LibFunc, AllocFnsTy> &P) {
         return P.first == TLIFn;
       });
 
@@ -183,7 +184,7 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V,
 
 static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
   ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
-  return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias);
+  return CS && CS.hasRetAttr(Attribute::NoAlias);
 }
 
 
@@ -220,6 +221,14 @@ bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 }
 
 /// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory similiar to malloc or calloc.
+bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                                  bool LookThroughBitCast) {
+  return getAllocationData(V, MallocOrCallocLike, TLI,
+                           LookThroughBitCast).hasValue();
+}
+
+/// \brief Tests if a value is a call or invoke to a library function that
 /// allocates memory (either malloc, calloc, or strdup like).
 bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                          bool LookThroughBitCast) {
@@ -333,33 +342,33 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
     return nullptr;
 
   StringRef FnName = Callee->getName();
-  LibFunc::Func TLIFn;
+  LibFunc TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return nullptr;
 
   unsigned ExpectedNumParams;
-  if (TLIFn == LibFunc::free ||
-      TLIFn == LibFunc::ZdlPv || // operator delete(void*)
-      TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
-      TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
-      TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
-      TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
-      TLIFn == LibFunc::msvc_delete_array_ptr64)   // operator delete[](void*)
+  if (TLIFn == LibFunc_free ||
+      TLIFn == LibFunc_ZdlPv || // operator delete(void*)
+      TLIFn == LibFunc_ZdaPv || // operator delete[](void*)
+      TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*)
+      TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*)
+      TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*)
+      TLIFn == LibFunc_msvc_delete_array_ptr64)   // operator delete[](void*)
     ExpectedNumParams = 1;
-  else if (TLIFn == LibFunc::ZdlPvj ||              // delete(void*, uint)
-           TLIFn == LibFunc::ZdlPvm ||              // delete(void*, ulong)
-           TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
-           TLIFn == LibFunc::ZdaPvj ||              // delete[](void*, uint)
-           TLIFn == LibFunc::ZdaPvm ||              // delete[](void*, ulong)
-           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
-           TLIFn == LibFunc::msvc_delete_ptr32_int ||      // delete(void*, uint)
-           TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
-           TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
-           TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
-           TLIFn == LibFunc::msvc_delete_array_ptr32_int ||      // delete[](void*, uint)
-           TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
-           TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
-           TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow)   // delete[](void*, nothrow)
+  else if (TLIFn == LibFunc_ZdlPvj ||              // delete(void*, uint)
+           TLIFn == LibFunc_ZdlPvm ||              // delete(void*, ulong)
+           TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+           TLIFn == LibFunc_ZdaPvj ||              // delete[](void*, uint)
+           TLIFn == LibFunc_ZdaPvm ||              // delete[](void*, ulong)
+           TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+           TLIFn == LibFunc_msvc_delete_ptr32_int ||      // delete(void*, uint)
+           TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+           TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc_msvc_delete_array_ptr32_int ||      // delete[](void*, uint)
+           TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+           TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+           TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow)   // delete[](void*, nothrow)
     ExpectedNumParams = 2;
   else
     return nullptr;
@@ -391,13 +400,11 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
 
 /// \brief Compute the size of the object pointed by Ptr. Returns true and the
 /// object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
+/// If RoundToAlign is true, then Size is rounded up to the alignment of
+/// allocas, byval arguments, and global variables.
 bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI, bool RoundToAlign,
-                         llvm::ObjSizeMode Mode) {
-  ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(),
-                                  RoundToAlign, Mode);
+                         const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) {
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts);
   SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
   if (!Visitor.bothKnown(Data))
     return false;
@@ -414,19 +421,23 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
          "ObjectSize must be a call to llvm.objectsize!");
 
   bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
-  ObjSizeMode Mode;
+  ObjectSizeOpts EvalOptions;
   // Unless we have to fold this to something, try to be as accurate as
   // possible.
   if (MustSucceed)
-    Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min;
+    EvalOptions.EvalMode =
+        MaxVal ? ObjectSizeOpts::Mode::Max : ObjectSizeOpts::Mode::Min;
   else
-    Mode = ObjSizeMode::Exact;
+    EvalOptions.EvalMode = ObjectSizeOpts::Mode::Exact;
+
+  EvalOptions.NullIsUnknownSize =
+      cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
 
   // FIXME: Does it make sense to just return a failure value if the size won't
   // fit in the output and `!MustSucceed`?
   uint64_t Size;
   auto *ResultType = cast<IntegerType>(ObjectSize->getType());
-  if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) &&
+  if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
       isUIntN(ResultType->getBitWidth(), Size))
     return ConstantInt::get(ResultType, Size);
 
@@ -443,7 +454,7 @@ STATISTIC(ObjectVisitorLoad,
 
 
 APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
-  if (RoundToAlign && Align)
+  if (Options.RoundToAlign && Align)
     return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
   return Size;
 }
@@ -451,9 +462,8 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
 ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
                                                  const TargetLibraryInfo *TLI,
                                                  LLVMContext &Context,
-                                                 bool RoundToAlign,
-                                                 ObjSizeMode Mode)
-    : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign), Mode(Mode) {
+                                                 ObjectSizeOpts Options)
+    : DL(DL), TLI(TLI), Options(Options) {
   // Pointer size must be rechecked for each object visited since it could have
   // a different address space.
 }
@@ -495,6 +505,22 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   return unknown();
 }
 
+/// When we're compiling N-bit code, and the user uses parameters that are
+/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
+/// trouble with APInt size issues. This function handles resizing + overflow
+/// checks for us. Check and zext or trunc \p I depending on IntTyBits and
+/// I's value.
+bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) {
+  // More bits than we can handle. Checking the bit width isn't necessary, but
+  // it's faster than checking active bits, and should give `false` in the
+  // vast majority of cases.
+  if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
+    return false;
+  if (I.getBitWidth() != IntTyBits)
+    I = I.zextOrTrunc(IntTyBits);
+  return true;
+}
+
 SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
   if (!I.getAllocatedType()->isSized())
     return unknown();
@@ -505,8 +531,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
 
   Value *ArraySize = I.getArraySize();
   if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
-    Size *= C->getValue().zextOrSelf(IntTyBits);
-    return std::make_pair(align(Size, I.getAlignment()), Zero);
+    APInt NumElems = C->getValue();
+    if (!CheckedZextOrTrunc(NumElems))
+      return unknown();
+
+    bool Overflow;
+    Size = Size.umul_ov(NumElems, Overflow);
+    return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()),
+                                                 Zero);
   }
   return unknown();
 }
@@ -551,21 +583,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
   if (!Arg)
     return unknown();
 
-  // When we're compiling N-bit code, and the user uses parameters that are
-  // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
-  // trouble with APInt size issues. This function handles resizing + overflow
-  // checks for us.
-  auto CheckedZextOrTrunc = [&](APInt &I) {
-    // More bits than we can handle. Checking the bit width isn't necessary, but
-    // it's faster than checking active bits, and should give `false` in the
-    // vast majority of cases.
-    if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
-      return false;
-    if (I.getBitWidth() != IntTyBits)
-      I = I.zextOrTrunc(IntTyBits);
-    return true;
-  };
-
   APInt Size = Arg->getValue();
   if (!CheckedZextOrTrunc(Size))
     return unknown();
@@ -596,7 +613,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
 }
 
 SizeOffsetType
-ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) {
+ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) {
+  if (Options.NullIsUnknownSize && CPN.getType()->getAddressSpace() == 0)
+    return unknown();
   return std::make_pair(Zero, Zero);
 }
 
@@ -663,12 +682,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
     if (TrueResult == FalseResult) {
       return TrueSide;
     }
-    if (Mode == ObjSizeMode::Min) {
+    if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
       if (TrueResult.slt(FalseResult))
         return TrueSide;
       return FalseSide;
     }
-    if (Mode == ObjSizeMode::Max) {
+    if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
       if (TrueResult.sgt(FalseResult))
         return TrueSide;
       return FalseSide;
@@ -719,7 +738,10 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
-  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
+  ObjectSizeOpts ObjSizeOptions;
+  ObjSizeOptions.RoundToAlign = RoundToAlign;
+
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions);
   SizeOffsetType Const = Visitor.compute(V);
   if (Visitor.bothKnown(Const))
     return std::make_pair(ConstantInt::get(Context, Const.first),
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 66a0d14..263cf42 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,17 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -310,11 +310,11 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
 }
 
 static bool isVolatile(Instruction *Inst) {
-  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+  if (auto *LI = dyn_cast<LoadInst>(Inst))
     return LI->isVolatile();
-  else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+  if (auto *SI = dyn_cast<StoreInst>(Inst))
     return SI->isVolatile();
-  else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
     return AI->isVolatile();
   return false;
 }
@@ -691,6 +691,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
       // load query, we can safely ignore it (scan past it).
       if (isLoad)
         continue;
+      LLVM_FALLTHROUGH;
     default:
       // Otherwise, there is a potential dependence.  Return a clobber.
       return MemDepResult::getClobber(Inst);
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
index a0ae72f..9db6c49 100644
--- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -142,9 +142,9 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
   // for memcpy/memset.  This is particularly important because the
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
   // whenever possible.
-  LibFunc::Func F;
+  LibFunc F;
   if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
-      F == LibFunc::memset_pattern16 && TLI.has(F)) {
+      F == LibFunc_memset_pattern16 && TLI.has(F)) {
     assert((ArgIdx == 0 || ArgIdx == 1) &&
            "Invalid argument index for memset_pattern16");
     if (ArgIdx == 1)
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
new file mode 100644
index 0000000..86de474
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -0,0 +1,2074 @@
+//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the MemorySSA class.
+//
+//===----------------------------------------------------------------===//
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "memoryssa"
+using namespace llvm;
+INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
+                      true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
+                    true)
+
+INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",
+                      "Memory SSA Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
+                    "Memory SSA Printer", false, false)
+
+static cl::opt<unsigned> MaxCheckLimit(
+    "memssa-check-limit", cl::Hidden, cl::init(100),
+    cl::desc("The maximum number of stores/phis MemorySSA"
+             "will consider trying to walk past (default = 100)"));
+
+static cl::opt<bool>
+    VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
+                    cl::desc("Verify MemorySSA in legacy printer pass."));
+
+namespace llvm {
+/// \brief An assembly annotator class to print Memory SSA information in
+/// comments.
+class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
+  friend class MemorySSA;
+  const MemorySSA *MSSA;
+
+public:
+  MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
+
+  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                        formatted_raw_ostream &OS) {
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
+      OS << "; " << *MA << "\n";
+  }
+
+  virtual void emitInstructionAnnot(const Instruction *I,
+                                    formatted_raw_ostream &OS) {
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+      OS << "; " << *MA << "\n";
+  }
+};
+}
+
+namespace {
+/// Our current alias analysis API differentiates heavily between calls and
+/// non-calls, and functions called on one usually assert on the other.
+/// This class encapsulates the distinction to simplify other code that wants
+/// "Memory affecting instructions and related data" to use as a key.
+/// For example, this class is used as a densemap key in the use optimizer.
+class MemoryLocOrCall {
+public:
+  MemoryLocOrCall() : IsCall(false) {}
+  MemoryLocOrCall(MemoryUseOrDef *MUD)
+      : MemoryLocOrCall(MUD->getMemoryInst()) {}
+  MemoryLocOrCall(const MemoryUseOrDef *MUD)
+      : MemoryLocOrCall(MUD->getMemoryInst()) {}
+
+  MemoryLocOrCall(Instruction *Inst) {
+    if (ImmutableCallSite(Inst)) {
+      IsCall = true;
+      CS = ImmutableCallSite(Inst);
+    } else {
+      IsCall = false;
+      // There is no such thing as a memorylocation for a fence inst, and it is
+      // unique in that regard.
+      if (!isa<FenceInst>(Inst))
+        Loc = MemoryLocation::get(Inst);
+    }
+  }
+
+  explicit MemoryLocOrCall(const MemoryLocation &Loc)
+      : IsCall(false), Loc(Loc) {}
+
+  bool IsCall;
+  ImmutableCallSite getCS() const {
+    assert(IsCall);
+    return CS;
+  }
+  MemoryLocation getLoc() const {
+    assert(!IsCall);
+    return Loc;
+  }
+
+  bool operator==(const MemoryLocOrCall &Other) const {
+    if (IsCall != Other.IsCall)
+      return false;
+
+    if (IsCall)
+      return CS.getCalledValue() == Other.CS.getCalledValue();
+    return Loc == Other.Loc;
+  }
+
+private:
+  union {
+    ImmutableCallSite CS;
+    MemoryLocation Loc;
+  };
+};
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<MemoryLocOrCall> {
+  static inline MemoryLocOrCall getEmptyKey() {
+    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
+  }
+  static inline MemoryLocOrCall getTombstoneKey() {
+    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
+    if (MLOC.IsCall)
+      return hash_combine(MLOC.IsCall,
+                          DenseMapInfo<const Value *>::getHashValue(
+                              MLOC.getCS().getCalledValue()));
+    return hash_combine(
+        MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
+  }
+  static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
+    return LHS == RHS;
+  }
+};
+
+enum class Reorderability { Always, IfNoAlias, Never };
+
+/// This does one-way checks to see if Use could theoretically be hoisted above
+/// MayClobber. This will not check the other way around.
+///
+/// This assumes that, for the purposes of MemorySSA, Use comes directly after
+/// MayClobber, with no potentially clobbering operations in between them.
+/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
+static Reorderability getLoadReorderability(const LoadInst *Use,
+                                            const LoadInst *MayClobber) {
+  bool VolatileUse = Use->isVolatile();
+  bool VolatileClobber = MayClobber->isVolatile();
+  // Volatile operations may never be reordered with other volatile operations.
+  if (VolatileUse && VolatileClobber)
+    return Reorderability::Never;
+
+  // The lang ref allows reordering of volatile and non-volatile operations.
+  // Whether an aliasing nonvolatile load and volatile load can be reordered,
+  // though, is ambiguous. Because it may not be best to exploit this ambiguity,
+  // we only allow volatile/non-volatile reordering if the volatile and
+  // non-volatile operations don't alias.
+  Reorderability Result = VolatileUse || VolatileClobber
+                              ? Reorderability::IfNoAlias
+                              : Reorderability::Always;
+
+  // If a load is seq_cst, it cannot be moved above other loads. If its ordering
+  // is weaker, it can be moved above other loads. We just need to be sure that
+  // MayClobber isn't an acquire load, because loads can't be moved above
+  // acquire loads.
+  //
+  // Note that this explicitly *does* allow the free reordering of monotonic (or
+  // weaker) loads of the same address.
+  bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+  bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(),
+                                                     AtomicOrdering::Acquire);
+  if (SeqCstUse || MayClobberIsAcquire)
+    return Reorderability::Never;
+  return Result;
+}
+
+static bool instructionClobbersQuery(MemoryDef *MD,
+                                     const MemoryLocation &UseLoc,
+                                     const Instruction *UseInst,
+                                     AliasAnalysis &AA) {
+  Instruction *DefInst = MD->getMemoryInst();
+  assert(DefInst && "Defining instruction not actually an instruction");
+  ImmutableCallSite UseCS(UseInst);
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
+    // These intrinsics will show up as affecting memory, but they are just
+    // markers.
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+      if (UseCS)
+        return false;
+      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc);
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::assume:
+      return false;
+    default:
+      break;
+    }
+  }
+
+  if (UseCS) {
+    ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
+    return I != MRI_NoModRef;
+  }
+
+  if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) {
+    if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) {
+      switch (getLoadReorderability(UseLoad, DefLoad)) {
+      case Reorderability::Always:
+        return false;
+      case Reorderability::Never:
+        return true;
+      case Reorderability::IfNoAlias:
+        return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad));
+      }
+    }
+  }
+
+  return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod;
+}
+
+static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
+                                     const MemoryLocOrCall &UseMLOC,
+                                     AliasAnalysis &AA) {
+  // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
+  // to exist while MemoryLocOrCall is pushed through places.
+  if (UseMLOC.IsCall)
+    return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
+                                    AA);
+  return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
+                                  AA);
+}
+
+// Return true when MD may alias MU, return false otherwise.
+bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+                                        AliasAnalysis &AA) {
+  return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
+}
+}
+
+namespace {
+struct UpwardsMemoryQuery {
+  // True if our original query started off as a call
+  bool IsCall;
+  // The pointer location we started the query with. This will be empty if
+  // IsCall is true.
+  MemoryLocation StartingLoc;
+  // This is the instruction we were querying about.
+  const Instruction *Inst;
+  // The MemoryAccess we actually got called with, used to test local domination
+  const MemoryAccess *OriginalAccess;
+
+  UpwardsMemoryQuery()
+      : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {}
+
+  UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
+      : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
+    if (!IsCall)
+      StartingLoc = MemoryLocation::get(Inst);
+  }
+};
+
+static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
+                           AliasAnalysis &AA) {
+  Instruction *Inst = MD->getMemoryInst();
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_end:
+      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
+                                                   const Instruction *I) {
+  // If the memory can't be changed, then loads of the memory can't be
+  // clobbered.
+  //
+  // FIXME: We should handle invariant groups, as well. It's a bit harder,
+  // because we need to pay close attention to invariant group barriers.
+  return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
+                              AA.pointsToConstantMemory(cast<LoadInst>(I)->
+                                                          getPointerOperand()));
+}
+
+/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
+/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
+///
+/// This is meant to be as simple and self-contained as possible. Because it
+/// uses no cache, etc., it can be relatively expensive.
+///
+/// \param Start     The MemoryAccess that we want to walk from.
+/// \param ClobberAt A clobber for Start.
+/// \param StartLoc  The MemoryLocation for Start.
+/// \param MSSA      The MemorySSA isntance that Start and ClobberAt belong to.
+/// \param Query     The UpwardsMemoryQuery we used for our search.
+/// \param AA        The AliasAnalysis we used for our search.
+static void LLVM_ATTRIBUTE_UNUSED
+checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
+                   const MemoryLocation &StartLoc, const MemorySSA &MSSA,
+                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
+  assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
+
+  if (MSSA.isLiveOnEntryDef(Start)) {
+    assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
+           "liveOnEntry must clobber itself");
+    return;
+  }
+
+  bool FoundClobber = false;
+  DenseSet<MemoryAccessPair> VisitedPhis;
+  SmallVector<MemoryAccessPair, 8> Worklist;
+  Worklist.emplace_back(Start, StartLoc);
+  // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
+  // is found, complain.
+  while (!Worklist.empty()) {
+    MemoryAccessPair MAP = Worklist.pop_back_val();
+    // All we care about is that nothing from Start to ClobberAt clobbers Start.
+    // We learn nothing from revisiting nodes.
+    if (!VisitedPhis.insert(MAP).second)
+      continue;
+
+    for (MemoryAccess *MA : def_chain(MAP.first)) {
+      if (MA == ClobberAt) {
+        if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+          // instructionClobbersQuery isn't essentially free, so don't use `|=`,
+          // since it won't let us short-circuit.
+          //
+          // Also, note that this can't be hoisted out of the `Worklist` loop,
+          // since MD may only act as a clobber for 1 of N MemoryLocations.
+          FoundClobber =
+              FoundClobber || MSSA.isLiveOnEntryDef(MD) ||
+              instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
+        }
+        break;
+      }
+
+      // We should never hit liveOnEntry, unless it's the clobber.
+      assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
+
+      if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+        (void)MD;
+        assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) &&
+               "Found clobber before reaching ClobberAt!");
+        continue;
+      }
+
+      assert(isa<MemoryPhi>(MA));
+      Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
+    }
+  }
+
+  // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
+  // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
+  assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
+         "ClobberAt never acted as a clobber");
+}
+
+/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
+/// in one class.
+class ClobberWalker {
+  /// Save a few bytes by using unsigned instead of size_t.
+  using ListIndex = unsigned;
+
+  /// Represents a span of contiguous MemoryDefs, potentially ending in a
+  /// MemoryPhi.
+  struct DefPath {
+    MemoryLocation Loc;
+    // Note that, because we always walk in reverse, Last will always dominate
+    // First. Also note that First and Last are inclusive.
+    MemoryAccess *First;
+    MemoryAccess *Last;
+    Optional<ListIndex> Previous;
+
+    DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
+            Optional<ListIndex> Previous)
+        : Loc(Loc), First(First), Last(Last), Previous(Previous) {}
+
+    DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
+            Optional<ListIndex> Previous)
+        : DefPath(Loc, Init, Init, Previous) {}
+  };
+
+  const MemorySSA &MSSA;
+  AliasAnalysis &AA;
+  DominatorTree &DT;
+  UpwardsMemoryQuery *Query;
+
+  // Phi optimization bookkeeping
+  SmallVector<DefPath, 32> Paths;
+  DenseSet<ConstMemoryAccessPair> VisitedPhis;
+
+  /// Find the nearest def or phi that `From` can legally be optimized to.
+  const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
+    assert(From->getNumOperands() && "Phi with no operands?");
+
+    BasicBlock *BB = From->getBlock();
+    MemoryAccess *Result = MSSA.getLiveOnEntryDef();
+    DomTreeNode *Node = DT.getNode(BB);
+    while ((Node = Node->getIDom())) {
+      auto *Defs = MSSA.getBlockDefs(Node->getBlock());
+      if (Defs)
+        return &*Defs->rbegin();
+    }
+    return Result;
+  }
+
+  /// Result of calling walkToPhiOrClobber.
+  struct UpwardsWalkResult {
+    /// The "Result" of the walk. Either a clobber, the last thing we walked, or
+    /// both.
+    MemoryAccess *Result;
+    bool IsKnownClobber;
+  };
+
+  /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
+  /// This will update Desc.Last as it walks. It will (optionally) also stop at
+  /// StopAt.
+  ///
+  /// This does not test for whether StopAt is a clobber
+  UpwardsWalkResult
+  walkToPhiOrClobber(DefPath &Desc,
+                     const MemoryAccess *StopAt = nullptr) const {
+    assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
+
+    for (MemoryAccess *Current : def_chain(Desc.Last)) {
+      Desc.Last = Current;
+      if (Current == StopAt)
+        return {Current, false};
+
+      if (auto *MD = dyn_cast<MemoryDef>(Current))
+        if (MSSA.isLiveOnEntryDef(MD) ||
+            instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
+          return {MD, true};
+    }
+
+    assert(isa<MemoryPhi>(Desc.Last) &&
+           "Ended at a non-clobber that's not a phi?");
+    return {Desc.Last, false};
+  }
+
+  void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
+                   ListIndex PriorNode) {
+    auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}),
+                                 upward_defs_end());
+    for (const MemoryAccessPair &P : UpwardDefs) {
+      PausedSearches.push_back(Paths.size());
+      Paths.emplace_back(P.second, P.first, PriorNode);
+    }
+  }
+
+  /// Represents a search that terminated after finding a clobber. This clobber
+  /// may or may not be present in the path of defs from LastNode..SearchStart,
+  /// since it may have been retrieved from cache.
+  struct TerminatedPath {
+    MemoryAccess *Clobber;
+    ListIndex LastNode;
+  };
+
+  /// Get an access that keeps us from optimizing to the given phi.
+  ///
+  /// PausedSearches is an array of indices into the Paths array. Its incoming
+  /// value is the indices of searches that stopped at the last phi optimization
+  /// target. It's left in an unspecified state.
+  ///
+  /// If this returns None, NewPaused is a vector of searches that terminated
+  /// at StopWhere. Otherwise, NewPaused is left in an unspecified state.
+  Optional<TerminatedPath>
+  getBlockingAccess(const MemoryAccess *StopWhere,
+                    SmallVectorImpl<ListIndex> &PausedSearches,
+                    SmallVectorImpl<ListIndex> &NewPaused,
+                    SmallVectorImpl<TerminatedPath> &Terminated) {
+    assert(!PausedSearches.empty() && "No searches to continue?");
+
+    // BFS vs DFS really doesn't make a difference here, so just do a DFS with
+    // PausedSearches as our stack.
+    while (!PausedSearches.empty()) {
+      ListIndex PathIndex = PausedSearches.pop_back_val();
+      DefPath &Node = Paths[PathIndex];
+
+      // If we've already visited this path with this MemoryLocation, we don't
+      // need to do so again.
+      //
+      // NOTE: That we just drop these paths on the ground makes caching
+      // behavior sporadic. e.g. given a diamond:
+      //  A
+      // B C
+      //  D
+      //
+      // ...If we walk D, B, A, C, we'll only cache the result of phi
+      // optimization for A, B, and D; C will be skipped because it dies here.
+      // This arguably isn't the worst thing ever, since:
+      //   - We generally query things in a top-down order, so if we got below D
+      //     without needing cache entries for {C, MemLoc}, then chances are
+      //     that those cache entries would end up ultimately unused.
+      //   - We still cache things for A, so C only needs to walk up a bit.
+      // If this behavior becomes problematic, we can fix without a ton of extra
+      // work.
+      if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
+        continue;
+
+      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
+      if (Res.IsKnownClobber) {
+        assert(Res.Result != StopWhere);
+        // If this wasn't a cache hit, we hit a clobber when walking. That's a
+        // failure.
+        TerminatedPath Term{Res.Result, PathIndex};
+        if (!MSSA.dominates(Res.Result, StopWhere))
+          return Term;
+
+        // Otherwise, it's a valid thing to potentially optimize to.
+        Terminated.push_back(Term);
+        continue;
+      }
+
+      if (Res.Result == StopWhere) {
+        // We've hit our target. Save this path off for if we want to continue
+        // walking.
+        NewPaused.push_back(PathIndex);
+        continue;
+      }
+
+      assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
+      addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex);
+    }
+
+    return None;
+  }
+
+  template <typename T, typename Walker>
+  struct generic_def_path_iterator
+      : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
+                                    std::forward_iterator_tag, T *> {
+    generic_def_path_iterator() : W(nullptr), N(None) {}
+    generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
+
+    T &operator*() const { return curNode(); }
+
+    generic_def_path_iterator &operator++() {
+      N = curNode().Previous;
+      return *this;
+    }
+
+    bool operator==(const generic_def_path_iterator &O) const {
+      if (N.hasValue() != O.N.hasValue())
+        return false;
+      return !N.hasValue() || *N == *O.N;
+    }
+
+  private:
+    T &curNode() const { return W->Paths[*N]; }
+
+    Walker *W;
+    Optional<ListIndex> N;
+  };
+
+  using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
+  using const_def_path_iterator =
+      generic_def_path_iterator<const DefPath, const ClobberWalker>;
+
+  iterator_range<def_path_iterator> def_path(ListIndex From) {
+    return make_range(def_path_iterator(this, From), def_path_iterator());
+  }
+
+  iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
+    return make_range(const_def_path_iterator(this, From),
+                      const_def_path_iterator());
+  }
+
+  struct OptznResult {
+    /// The path that contains our result.
+    TerminatedPath PrimaryClobber;
+    /// The paths that we can legally cache back from, but that aren't
+    /// necessarily the result of the Phi optimization.
+    SmallVector<TerminatedPath, 4> OtherClobbers;
+  };
+
+  ListIndex defPathIndex(const DefPath &N) const {
+    // The assert looks nicer if we don't need to do &N
+    const DefPath *NP = &N;
+    assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
+           "Out of bounds DefPath!");
+    return NP - &Paths.front();
+  }
+
+  /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
+  /// that act as legal clobbers. Note that this won't return *all* clobbers.
+  ///
+  /// Phi optimization algorithm tl;dr:
+  ///   - Find the earliest def/phi, A, we can optimize to
+  ///   - Find if all paths from the starting memory access ultimately reach A
+  ///     - If not, optimization isn't possible.
+  ///     - Otherwise, walk from A to another clobber or phi, A'.
+  ///       - If A' is a def, we're done.
+  ///       - If A' is a phi, try to optimize it.
+  ///
+  /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
+  /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
+  OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
+                             const MemoryLocation &Loc) {
+    assert(Paths.empty() && VisitedPhis.empty() &&
+           "Reset the optimization state.");
+
+    Paths.emplace_back(Loc, Start, Phi, None);
+    // Stores how many "valid" optimization nodes we had prior to calling
+    // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
+    auto PriorPathsSize = Paths.size();
+
+    SmallVector<ListIndex, 16> PausedSearches;
+    SmallVector<ListIndex, 8> NewPaused;
+    SmallVector<TerminatedPath, 4> TerminatedPaths;
+
+    addSearches(Phi, PausedSearches, 0);
+
+    // Moves the TerminatedPath with the "most dominated" Clobber to the end of
+    // Paths.
+    auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
+      assert(!Paths.empty() && "Need a path to move");
+      auto Dom = Paths.begin();
+      for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I)
+        if (!MSSA.dominates(I->Clobber, Dom->Clobber))
+          Dom = I;
+      auto Last = Paths.end() - 1;
+      if (Last != Dom)
+        std::iter_swap(Last, Dom);
+    };
+
+    MemoryPhi *Current = Phi;
+    while (1) {
+      assert(!MSSA.isLiveOnEntryDef(Current) &&
+             "liveOnEntry wasn't treated as a clobber?");
+
+      const auto *Target = getWalkTarget(Current);
+      // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
+      // optimization for the prior phi.
+      assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
+        return MSSA.dominates(P.Clobber, Target);
+      }));
+
+      // FIXME: This is broken, because the Blocker may be reported to be
+      // liveOnEntry, and we'll happily wait for that to disappear (read: never)
+      // For the moment, this is fine, since we do nothing with blocker info.
+      if (Optional<TerminatedPath> Blocker = getBlockingAccess(
+              Target, PausedSearches, NewPaused, TerminatedPaths)) {
+
+        // Find the node we started at. We can't search based on N->Last, since
+        // we may have gone around a loop with a different MemoryLocation.
+        auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) {
+          return defPathIndex(N) < PriorPathsSize;
+        });
+        assert(Iter != def_path_iterator());
+
+        DefPath &CurNode = *Iter;
+        assert(CurNode.Last == Current);
+
+        // Two things:
+        // A. We can't reliably cache all of NewPaused back. Consider a case
+        //    where we have two paths in NewPaused; one of which can't optimize
+        //    above this phi, whereas the other can. If we cache the second path
+        //    back, we'll end up with suboptimal cache entries. We can handle
+        //    cases like this a bit better when we either try to find all
+        //    clobbers that block phi optimization, or when our cache starts
+        //    supporting unfinished searches.
+        // B. We can't reliably cache TerminatedPaths back here without doing
+        //    extra checks; consider a case like:
+        //       T
+        //      / \
+        //     D   C
+        //      \ /
+        //       S
+        //    Where T is our target, C is a node with a clobber on it, D is a
+        //    diamond (with a clobber *only* on the left or right node, N), and
+        //    S is our start. Say we walk to D, through the node opposite N
+        //    (read: ignoring the clobber), and see a cache entry in the top
+        //    node of D. That cache entry gets put into TerminatedPaths. We then
+        //    walk up to C (N is later in our worklist), find the clobber, and
+        //    quit. If we append TerminatedPaths to OtherClobbers, we'll cache
+        //    the bottom part of D to the cached clobber, ignoring the clobber
+        //    in N. Again, this problem goes away if we start tracking all
+        //    blockers for a given phi optimization.
+        TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)};
+        return {Result, {}};
+      }
+
+      // If there's nothing left to search, then all paths led to valid clobbers
+      // that we got from our cache; pick the nearest to the start, and allow
+      // the rest to be cached back.
+      if (NewPaused.empty()) {
+        MoveDominatedPathToEnd(TerminatedPaths);
+        TerminatedPath Result = TerminatedPaths.pop_back_val();
+        return {Result, std::move(TerminatedPaths)};
+      }
+
+      MemoryAccess *DefChainEnd = nullptr;
+      SmallVector<TerminatedPath, 4> Clobbers;
+      for (ListIndex Paused : NewPaused) {
+        UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]);
+        if (WR.IsKnownClobber)
+          Clobbers.push_back({WR.Result, Paused});
+        else
+          // Micro-opt: If we hit the end of the chain, save it.
+          DefChainEnd = WR.Result;
+      }
+
+      if (!TerminatedPaths.empty()) {
+        // If we couldn't find the dominating phi/liveOnEntry in the above loop,
+        // do it now.
+        if (!DefChainEnd)
+          for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
+            DefChainEnd = MA;
+
+        // If any of the terminated paths don't dominate the phi we'll try to
+        // optimize, we need to figure out what they are and quit.
+        const BasicBlock *ChainBB = DefChainEnd->getBlock();
+        for (const TerminatedPath &TP : TerminatedPaths) {
+          // Because we know that DefChainEnd is as "high" as we can go, we
+          // don't need local dominance checks; BB dominance is sufficient.
+          if (DT.dominates(ChainBB, TP.Clobber->getBlock()))
+            Clobbers.push_back(TP);
+        }
+      }
+
+      // If we have clobbers in the def chain, find the one closest to Current
+      // and quit.
+      if (!Clobbers.empty()) {
+        MoveDominatedPathToEnd(Clobbers);
+        TerminatedPath Result = Clobbers.pop_back_val();
+        return {Result, std::move(Clobbers)};
+      }
+
+      assert(all_of(NewPaused,
+                    [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
+
+      // Because liveOnEntry is a clobber, this must be a phi.
+      auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd);
+
+      PriorPathsSize = Paths.size();
+      PausedSearches.clear();
+      for (ListIndex I : NewPaused)
+        addSearches(DefChainPhi, PausedSearches, I);
+      NewPaused.clear();
+
+      Current = DefChainPhi;
+    }
+  }
+
+  void verifyOptResult(const OptznResult &R) const {
+    assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
+      return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
+    }));
+  }
+
+  void resetPhiOptznState() {
+    Paths.clear();
+    VisitedPhis.clear();
+  }
+
+public:
+  ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
+      : MSSA(MSSA), AA(AA), DT(DT) {}
+
+  void reset() {}
+
+  /// Finds the nearest clobber for the given query, optimizing phis if
+  /// possible.
+  MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
+    Query = &Q;
+
+    MemoryAccess *Current = Start;
+    // This walker pretends uses don't exist. If we're handed one, silently grab
+    // its def. (This has the nice side-effect of ensuring we never cache uses)
+    if (auto *MU = dyn_cast<MemoryUse>(Start))
+      Current = MU->getDefiningAccess();
+
+    DefPath FirstDesc(Q.StartingLoc, Current, Current, None);
+    // Fast path for the overly-common case (no crazy phi optimization
+    // necessary)
+    UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc);
+    MemoryAccess *Result;
+    if (WalkResult.IsKnownClobber) {
+      Result = WalkResult.Result;
+    } else {
+      OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
+                                          Current, Q.StartingLoc);
+      verifyOptResult(OptRes);
+      resetPhiOptznState();
+      Result = OptRes.PrimaryClobber.Clobber;
+    }
+
+#ifdef EXPENSIVE_CHECKS
+    checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
+#endif
+    return Result;
+  }
+
+  void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
+};
+
+struct RenamePassData {
+  DomTreeNode *DTN;
+  DomTreeNode::const_iterator ChildIt;
+  MemoryAccess *IncomingVal;
+
+  RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
+                 MemoryAccess *M)
+      : DTN(D), ChildIt(It), IncomingVal(M) {}
+  void swap(RenamePassData &RHS) {
+    std::swap(DTN, RHS.DTN);
+    std::swap(ChildIt, RHS.ChildIt);
+    std::swap(IncomingVal, RHS.IncomingVal);
+  }
+};
+} // anonymous namespace
+
+namespace llvm {
+/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no
+/// longer does caching on its own,
+/// but the name has been retained for the moment.
+class MemorySSA::CachingWalker final : public MemorySSAWalker {
+  ClobberWalker Walker;
+  bool AutoResetWalker;
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+  void verifyRemoved(MemoryAccess *);
+
+public:
+  CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
+  ~CachingWalker() override;
+
+  using MemorySSAWalker::getClobberingMemoryAccess;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+                                          const MemoryLocation &) override;
+  void invalidateInfo(MemoryAccess *) override;
+
+  /// Whether we call resetClobberWalker() after each time we *actually* walk to
+  /// answer a clobber query.
+  void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
+
+  /// Drop the walker's persistent data structures.
+  void resetClobberWalker() { Walker.reset(); }
+
+  void verify(const MemorySSA *MSSA) override {
+    MemorySSAWalker::verify(MSSA);
+    Walker.verify(MSSA);
+  }
+};
+
+void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
+                                    bool RenameAllUses) {
+  // Pass through values to our successors
+  for (const BasicBlock *S : successors(BB)) {
+    auto It = PerBlockAccesses.find(S);
+    // Rename the phi nodes in our successor block
+    if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
+      continue;
+    AccessList *Accesses = It->second.get();
+    auto *Phi = cast<MemoryPhi>(&Accesses->front());
+    if (RenameAllUses) {
+      int PhiIndex = Phi->getBasicBlockIndex(BB);
+      assert(PhiIndex != -1 && "Incomplete phi during partial rename");
+      Phi->setIncomingValue(PhiIndex, IncomingVal);
+    } else
+      Phi->addIncoming(IncomingVal, BB);
+  }
+}
+
+/// \brief Rename a single basic block into MemorySSA form.
+/// Uses the standard SSA renaming algorithm.
+/// \returns The new incoming value.
+MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
+                                     bool RenameAllUses) {
+  auto It = PerBlockAccesses.find(BB);
+  // Skip most processing if the list is empty.
+  if (It != PerBlockAccesses.end()) {
+    AccessList *Accesses = It->second.get();
+    for (MemoryAccess &L : *Accesses) {
+      if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) {
+        if (MUD->getDefiningAccess() == nullptr || RenameAllUses)
+          MUD->setDefiningAccess(IncomingVal);
+        if (isa<MemoryDef>(&L))
+          IncomingVal = &L;
+      } else {
+        IncomingVal = &L;
+      }
+    }
+  }
+  return IncomingVal;
+}
+
+/// \brief This is the standard SSA renaming algorithm.
+///
+/// We walk the dominator tree in preorder, renaming accesses, and then filling
+/// in phi nodes in our successors.
+void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
+                           SmallPtrSetImpl<BasicBlock *> &Visited,
+                           bool SkipVisited, bool RenameAllUses) {
+  SmallVector<RenamePassData, 32> WorkStack;
+  // Skip everything if we already renamed this block and we are skipping.
+  // Note: You can't sink this into the if, because we need it to occur
+  // regardless of whether we skip blocks or not.
+  bool AlreadyVisited = !Visited.insert(Root->getBlock()).second;
+  if (SkipVisited && AlreadyVisited)
+    return;
+
+  IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses);
+  renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses);
+  WorkStack.push_back({Root, Root->begin(), IncomingVal});
+
+  while (!WorkStack.empty()) {
+    DomTreeNode *Node = WorkStack.back().DTN;
+    DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
+    IncomingVal = WorkStack.back().IncomingVal;
+
+    if (ChildIt == Node->end()) {
+      WorkStack.pop_back();
+    } else {
+      DomTreeNode *Child = *ChildIt;
+      ++WorkStack.back().ChildIt;
+      BasicBlock *BB = Child->getBlock();
+      // Note: You can't sink this into the if, because we need it to occur
+      // regardless of whether we skip blocks or not.
+      AlreadyVisited = !Visited.insert(BB).second;
+      if (SkipVisited && AlreadyVisited) {
+        // We already visited this during our renaming, which can happen when
+        // being asked to rename multiple blocks. Figure out the incoming val,
+        // which is the last def.
+        // Incoming value can only change if there is a block def, and in that
+        // case, it's the last block def in the list.
+        if (auto *BlockDefs = getWritableBlockDefs(BB))
+          IncomingVal = &*BlockDefs->rbegin();
+      } else
+        IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses);
+      renameSuccessorPhis(BB, IncomingVal, RenameAllUses);
+      WorkStack.push_back({Child, Child->begin(), IncomingVal});
+    }
+  }
+}
+
+/// \brief This handles unreachable block accesses by deleting phi nodes in
+/// unreachable blocks, and marking all other unreachable MemoryAccess's as
+/// being uses of the live on entry definition.
+void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
+  assert(!DT->isReachableFromEntry(BB) &&
+         "Reachable block found while handling unreachable blocks");
+
+  // Make sure phi nodes in our reachable successors end up with a
+  // LiveOnEntryDef for our incoming edge, even though our block is forward
+  // unreachable.  We could just disconnect these blocks from the CFG fully,
+  // but we do not right now.
+  for (const BasicBlock *S : successors(BB)) {
+    if (!DT->isReachableFromEntry(S))
+      continue;
+    auto It = PerBlockAccesses.find(S);
+    // Rename the phi nodes in our successor block
+    if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
+      continue;
+    AccessList *Accesses = It->second.get();
+    auto *Phi = cast<MemoryPhi>(&Accesses->front());
+    Phi->addIncoming(LiveOnEntryDef.get(), BB);
+  }
+
+  auto It = PerBlockAccesses.find(BB);
+  if (It == PerBlockAccesses.end())
+    return;
+
+  auto &Accesses = It->second;
+  for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
+    auto Next = std::next(AI);
+    // If we have a phi, just remove it. We are going to replace all
+    // users with live on entry.
+    if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI))
+      UseOrDef->setDefiningAccess(LiveOnEntryDef.get());
+    else
+      Accesses->erase(AI);
+    AI = Next;
+  }
+}
+
+MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
+    : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+      NextID(INVALID_MEMORYACCESS_ID) {
+  buildMemorySSA();
+}
+
+MemorySSA::~MemorySSA() {
+  // Drop all our references
+  for (const auto &Pair : PerBlockAccesses)
+    for (MemoryAccess &MA : *Pair.second)
+      MA.dropAllReferences();
+}
+
+MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
+  auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
+
+  if (Res.second)
+    Res.first->second = make_unique<AccessList>();
+  return Res.first->second.get();
+}
+MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
+  auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
+
+  if (Res.second)
+    Res.first->second = make_unique<DefsList>();
+  return Res.first->second.get();
+}
+
+/// This class is a batch walker of all MemoryUse's in the program, and points
+/// their defining access at the thing that actually clobbers them.  Because it
+/// is a batch walker that touches everything, it does not operate like the
+/// other walkers.  This walker is basically performing a top-down SSA renaming
+/// pass, where the version stack is used as the cache.  This enables it to be
+/// significantly more time and memory efficient than using the regular walker,
+/// which is walking bottom-up.
+class MemorySSA::OptimizeUses {
+public:
+  OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
+               DominatorTree *DT)
+      : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
+    Walker = MSSA->getWalker();
+  }
+
+  void optimizeUses();
+
+private:
+  /// This represents where a given memorylocation is in the stack.
+  struct MemlocStackInfo {
+    // This essentially is keeping track of versions of the stack. Whenever
+    // the stack changes due to pushes or pops, these versions increase.
+    unsigned long StackEpoch;
+    unsigned long PopEpoch;
+    // This is the lower bound of places on the stack to check. It is equal to
+    // the place the last stack walk ended.
+    // Note: Correctness depends on this being initialized to 0, which densemap
+    // does
+    unsigned long LowerBound;
+    const BasicBlock *LowerBoundBlock;
+    // This is where the last walk for this memory location ended.
+    unsigned long LastKill;
+    bool LastKillValid;
+  };
+  void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
+                           SmallVectorImpl<MemoryAccess *> &,
+                           DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
+  MemorySSA *MSSA;
+  MemorySSAWalker *Walker;
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+};
+
+/// Optimize the uses in a given block This is basically the SSA renaming
+/// algorithm, with one caveat: We are able to use a single stack for all
+/// MemoryUses.  This is because the set of *possible* reaching MemoryDefs is
+/// the same for every MemoryUse.  The *actual* clobbering MemoryDef is just
+/// going to be some position in that stack of possible ones.
+///
+/// We track the stack positions that each MemoryLocation needs
+/// to check, and last ended at.  This is because we only want to check the
+/// things that changed since last time.  The same MemoryLocation should
+/// get clobbered by the same store (getModRefInfo does not use invariantness or
+/// things like this, and if they start, we can modify MemoryLocOrCall to
+/// include relevant data)
+void MemorySSA::OptimizeUses::optimizeUsesInBlock(
+    const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
+    SmallVectorImpl<MemoryAccess *> &VersionStack,
+    DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
+
+  /// If no accesses, nothing to do.
+  MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB);
+  if (Accesses == nullptr)
+    return;
+
+  // Pop everything that doesn't dominate the current block off the stack,
+  // increment the PopEpoch to account for this.
+  while (true) {
+    assert(
+        !VersionStack.empty() &&
+        "Version stack should have liveOnEntry sentinel dominating everything");
+    BasicBlock *BackBlock = VersionStack.back()->getBlock();
+    if (DT->dominates(BackBlock, BB))
+      break;
+    while (VersionStack.back()->getBlock() == BackBlock)
+      VersionStack.pop_back();
+    ++PopEpoch;
+  }
+
+  for (MemoryAccess &MA : *Accesses) {
+    auto *MU = dyn_cast<MemoryUse>(&MA);
+    if (!MU) {
+      VersionStack.push_back(&MA);
+      ++StackEpoch;
+      continue;
+    }
+
+    if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
+      MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
+      continue;
+    }
+
+    MemoryLocOrCall UseMLOC(MU);
+    auto &LocInfo = LocStackInfo[UseMLOC];
+    // If the pop epoch changed, it means we've removed stuff from top of
+    // stack due to changing blocks. We may have to reset the lower bound or
+    // last kill info.
+    if (LocInfo.PopEpoch != PopEpoch) {
+      LocInfo.PopEpoch = PopEpoch;
+      LocInfo.StackEpoch = StackEpoch;
+      // If the lower bound was in something that no longer dominates us, we
+      // have to reset it.
+      // We can't simply track stack size, because the stack may have had
+      // pushes/pops in the meantime.
+      // XXX: This is non-optimal, but only is slower cases with heavily
+      // branching dominator trees.  To get the optimal number of queries would
+      // be to make lowerbound and lastkill a per-loc stack, and pop it until
+      // the top of that stack dominates us.  This does not seem worth it ATM.
+      // A much cheaper optimization would be to always explore the deepest
+      // branch of the dominator tree first. This will guarantee this resets on
+      // the smallest set of blocks.
+      if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
+          !DT->dominates(LocInfo.LowerBoundBlock, BB)) {
+        // Reset the lower bound of things to check.
+        // TODO: Some day we should be able to reset to last kill, rather than
+        // 0.
+        LocInfo.LowerBound = 0;
+        LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
+        LocInfo.LastKillValid = false;
+      }
+    } else if (LocInfo.StackEpoch != StackEpoch) {
+      // If all that has changed is the StackEpoch, we only have to check the
+      // new things on the stack, because we've checked everything before.  In
+      // this case, the lower bound of things to check remains the same.
+      LocInfo.PopEpoch = PopEpoch;
+      LocInfo.StackEpoch = StackEpoch;
+    }
+    if (!LocInfo.LastKillValid) {
+      LocInfo.LastKill = VersionStack.size() - 1;
+      LocInfo.LastKillValid = true;
+    }
+
+    // At this point, we should have corrected last kill and LowerBound to be
+    // in bounds.
+    assert(LocInfo.LowerBound < VersionStack.size() &&
+           "Lower bound out of range");
+    assert(LocInfo.LastKill < VersionStack.size() &&
+           "Last kill info out of range");
+    // In any case, the new upper bound is the top of the stack.
+    unsigned long UpperBound = VersionStack.size() - 1;
+
+    if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
+      DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
+                   << *(MU->getMemoryInst()) << ")"
+                   << " because there are " << UpperBound - LocInfo.LowerBound
+                   << " stores to disambiguate\n");
+      // Because we did not walk, LastKill is no longer valid, as this may
+      // have been a kill.
+      LocInfo.LastKillValid = false;
+      continue;
+    }
+    bool FoundClobberResult = false;
+    while (UpperBound > LocInfo.LowerBound) {
+      if (isa<MemoryPhi>(VersionStack[UpperBound])) {
+        // For phis, use the walker, see where we ended up, go there
+        Instruction *UseInst = MU->getMemoryInst();
+        MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
+        // We are guaranteed to find it or something is wrong
+        while (VersionStack[UpperBound] != Result) {
+          assert(UpperBound != 0);
+          --UpperBound;
+        }
+        FoundClobberResult = true;
+        break;
+      }
+
+      MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
+      // If the lifetime of the pointer ends at this instruction, it's live on
+      // entry.
+      if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
+        // Reset UpperBound to liveOnEntryDef's place in the stack
+        UpperBound = 0;
+        FoundClobberResult = true;
+        break;
+      }
+      if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) {
+        FoundClobberResult = true;
+        break;
+      }
+      --UpperBound;
+    }
+    // At the end of this loop, UpperBound is either a clobber, or lower bound
+    // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
+    if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
+      MU->setDefiningAccess(VersionStack[UpperBound], true);
+      // We were last killed now by where we got to
+      LocInfo.LastKill = UpperBound;
+    } else {
+      // Otherwise, we checked all the new ones, and now we know we can get to
+      // LastKill.
+      MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true);
+    }
+    LocInfo.LowerBound = VersionStack.size() - 1;
+    LocInfo.LowerBoundBlock = BB;
+  }
+}
+
+/// Optimize uses to point to their actual clobbering definitions.
+void MemorySSA::OptimizeUses::optimizeUses() {
+  SmallVector<MemoryAccess *, 16> VersionStack;
+  DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
+  VersionStack.push_back(MSSA->getLiveOnEntryDef());
+
+  unsigned long StackEpoch = 1;
+  unsigned long PopEpoch = 1;
+  // We perform a non-recursive top-down dominator tree walk.
+  for (const auto *DomNode : depth_first(DT->getRootNode()))
+    optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
+                        LocStackInfo);
+}
+
+void MemorySSA::placePHINodes(
+    const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks,
+    const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) {
+  // Determine where our MemoryPhi's should go
+  ForwardIDFCalculator IDFs(*DT);
+  IDFs.setDefiningBlocks(DefiningBlocks);
+  SmallVector<BasicBlock *, 32> IDFBlocks;
+  IDFs.calculate(IDFBlocks);
+
+  std::sort(IDFBlocks.begin(), IDFBlocks.end(),
+            [&BBNumbers](const BasicBlock *A, const BasicBlock *B) {
+              return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+            });
+
+  // Now place MemoryPhi nodes.
+  for (auto &BB : IDFBlocks)
+    createMemoryPhi(BB);
+}
+
+void MemorySSA::buildMemorySSA() {
+  // We create an access to represent "live on entry", for things like
+  // arguments or users of globals, where the memory they use is defined before
+  // the beginning of the function. We do not actually insert it into the IR.
+  // We do not define a live on exit for the immediate uses, and thus our
+  // semantics do *not* imply that something with no immediate uses can simply
+  // be removed.
+  BasicBlock &StartingPoint = F.getEntryBlock();
+  LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
+                                          &StartingPoint, NextID++);
+  DenseMap<const BasicBlock *, unsigned int> BBNumbers;
+  unsigned NextBBNum = 0;
+
+  // We maintain lists of memory accesses per-block, trading memory for time. We
+  // could just look up the memory access for every possible instruction in the
+  // stream.
+  SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
+  // Go through each block, figure out where defs occur, and chain together all
+  // the accesses.
+  for (BasicBlock &B : F) {
+    BBNumbers[&B] = NextBBNum++;
+    bool InsertIntoDef = false;
+    AccessList *Accesses = nullptr;
+    DefsList *Defs = nullptr;
+    for (Instruction &I : B) {
+      MemoryUseOrDef *MUD = createNewAccess(&I);
+      if (!MUD)
+        continue;
+
+      if (!Accesses)
+        Accesses = getOrCreateAccessList(&B);
+      Accesses->push_back(MUD);
+      if (isa<MemoryDef>(MUD)) {
+        InsertIntoDef = true;
+        if (!Defs)
+          Defs = getOrCreateDefsList(&B);
+        Defs->push_back(*MUD);
+      }
+    }
+    if (InsertIntoDef)
+      DefiningBlocks.insert(&B);
+  }
+  placePHINodes(DefiningBlocks, BBNumbers);
+
+  // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
+  // filled in with all blocks.
+  SmallPtrSet<BasicBlock *, 16> Visited;
+  renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
+
+  CachingWalker *Walker = getWalkerImpl();
+
+  // We're doing a batch of updates; don't drop useful caches between them.
+  Walker->setAutoResetWalker(false);
+  OptimizeUses(this, Walker, AA, DT).optimizeUses();
+  Walker->setAutoResetWalker(true);
+  Walker->resetClobberWalker();
+
+  // Mark the uses in unreachable blocks as live on entry, so that they go
+  // somewhere.
+  for (auto &BB : F)
+    if (!Visited.count(&BB))
+      markUnreachableAsLiveOnEntry(&BB);
+}
+
+MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
+
+MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
+  if (Walker)
+    return Walker.get();
+
+  Walker = make_unique<CachingWalker>(this, AA, DT);
+  return Walker.get();
+}
+
+// This is a helper function used by the creation routines. It places NewAccess
+// into the access and defs lists for a given basic block, at the given
+// insertion point.
+void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess,
+                                        const BasicBlock *BB,
+                                        InsertionPlace Point) {
+  auto *Accesses = getOrCreateAccessList(BB);
+  if (Point == Beginning) {
+    // If it's a phi node, it goes first, otherwise, it goes after any phi
+    // nodes.
+    if (isa<MemoryPhi>(NewAccess)) {
+      Accesses->push_front(NewAccess);
+      auto *Defs = getOrCreateDefsList(BB);
+      Defs->push_front(*NewAccess);
+    } else {
+      auto AI = find_if_not(
+          *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
+      Accesses->insert(AI, NewAccess);
+      if (!isa<MemoryUse>(NewAccess)) {
+        auto *Defs = getOrCreateDefsList(BB);
+        auto DI = find_if_not(
+            *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
+        Defs->insert(DI, *NewAccess);
+      }
+    }
+  } else {
+    Accesses->push_back(NewAccess);
+    if (!isa<MemoryUse>(NewAccess)) {
+      auto *Defs = getOrCreateDefsList(BB);
+      Defs->push_back(*NewAccess);
+    }
+  }
+  BlockNumberingValid.erase(BB);
+}
+
+void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
+                                      AccessList::iterator InsertPt) {
+  auto *Accesses = getWritableBlockAccesses(BB);
+  bool WasEnd = InsertPt == Accesses->end();
+  Accesses->insert(AccessList::iterator(InsertPt), What);
+  if (!isa<MemoryUse>(What)) {
+    auto *Defs = getOrCreateDefsList(BB);
+    // If we got asked to insert at the end, we have an easy job, just shove it
+    // at the end. If we got asked to insert before an existing def, we also get
+    // an terator. If we got asked to insert before a use, we have to hunt for
+    // the next def.
+    if (WasEnd) {
+      Defs->push_back(*What);
+    } else if (isa<MemoryDef>(InsertPt)) {
+      Defs->insert(InsertPt->getDefsIterator(), *What);
+    } else {
+      while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt))
+        ++InsertPt;
+      // Either we found a def, or we are inserting at the end
+      if (InsertPt == Accesses->end())
+        Defs->push_back(*What);
+      else
+        Defs->insert(InsertPt->getDefsIterator(), *What);
+    }
+  }
+  BlockNumberingValid.erase(BB);
+}
+
+// Move What before Where in the IR.  The end result is taht What will belong to
+// the right lists and have the right Block set, but will not otherwise be
+// correct. It will not have the right defining access, and if it is a def,
+// things below it will not properly be updated.
+void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+                       AccessList::iterator Where) {
+  // Keep it in the lookup tables, remove from the lists
+  removeFromLists(What, false);
+  What->setBlock(BB);
+  insertIntoListsBefore(What, BB, Where);
+}
+
+void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+                       InsertionPlace Point) {
+  removeFromLists(What, false);
+  What->setBlock(BB);
+  insertIntoListsForBlock(What, BB, Point);
+}
+
+MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
+  assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
+  MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
+  // Phi's always are placed at the front of the block.
+  insertIntoListsForBlock(Phi, BB, Beginning);
+  ValueToMemoryAccess[BB] = Phi;
+  return Phi;
+}
+
+MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
+                                               MemoryAccess *Definition) {
+  assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
+  MemoryUseOrDef *NewAccess = createNewAccess(I);
+  assert(
+      NewAccess != nullptr &&
+      "Tried to create a memory access for a non-memory touching instruction");
+  NewAccess->setDefiningAccess(Definition);
+  return NewAccess;
+}
+
+// Return true if the instruction has ordering constraints.
+// Note specifically that this only considers stores and loads
+// because others are still considered ModRef by getModRefInfo.
+static inline bool isOrdered(const Instruction *I) {
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    if (!SI->isUnordered())
+      return true;
+  } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isUnordered())
+      return true;
+  }
+  return false;
+}
+/// \brief Helper function to create new memory accesses
+MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
+  // The assume intrinsic has a control dependency which we model by claiming
+  // that it writes arbitrarily. Ignore that fake memory dependency here.
+  // FIXME: Replace this special casing with a more accurate modelling of
+  // assume's control dependency.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    if (II->getIntrinsicID() == Intrinsic::assume)
+      return nullptr;
+
+  // Find out what affect this instruction has on memory.
+  ModRefInfo ModRef = AA->getModRefInfo(I);
+  // The isOrdered check is used to ensure that volatiles end up as defs
+  // (atomics end up as ModRef right now anyway).  Until we separate the
+  // ordering chain from the memory chain, this enables people to see at least
+  // some relative ordering to volatiles.  Note that getClobberingMemoryAccess
+  // will still give an answer that bypasses other volatile loads.  TODO:
+  // Separate memory aliasing and ordering into two different chains so that we
+  // can precisely represent both "what memory will this read/write/is clobbered
+  // by" and "what instructions can I move this past".
+  bool Def = bool(ModRef & MRI_Mod) || isOrdered(I);
+  bool Use = bool(ModRef & MRI_Ref);
+
+  // It's possible for an instruction to not modify memory at all. During
+  // construction, we ignore them.
+  if (!Def && !Use)
+    return nullptr;
+
+  assert((Def || Use) &&
+         "Trying to create a memory access with a non-memory instruction");
+
+  MemoryUseOrDef *MUD;
+  if (Def)
+    MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
+  else
+    MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
+  ValueToMemoryAccess[I] = MUD;
+  return MUD;
+}
+
+/// \brief Returns true if \p Replacer dominates \p Replacee .
+bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
+                             const MemoryAccess *Replacee) const {
+  if (isa<MemoryUseOrDef>(Replacee))
+    return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
+  const auto *MP = cast<MemoryPhi>(Replacee);
+  // For a phi node, the use occurs in the predecessor block of the phi node.
+  // Since we may occur multiple times in the phi node, we have to check each
+  // operand to ensure Replacer dominates each operand where Replacee occurs.
+  for (const Use &Arg : MP->operands()) {
+    if (Arg.get() != Replacee &&
+        !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
+      return false;
+  }
+  return true;
+}
+
+/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
+void MemorySSA::removeFromLookups(MemoryAccess *MA) {
+  assert(MA->use_empty() &&
+         "Trying to remove memory access that still has uses");
+  BlockNumbering.erase(MA);
+  if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
+    MUD->setDefiningAccess(nullptr);
+  // Invalidate our walker's cache if necessary
+  if (!isa<MemoryUse>(MA))
+    Walker->invalidateInfo(MA);
+  // The call below to erase will destroy MA, so we can't change the order we
+  // are doing things here
+  Value *MemoryInst;
+  if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+    MemoryInst = MUD->getMemoryInst();
+  } else {
+    MemoryInst = MA->getBlock();
+  }
+  auto VMA = ValueToMemoryAccess.find(MemoryInst);
+  if (VMA->second == MA)
+    ValueToMemoryAccess.erase(VMA);
+}
+
+/// \brief Properly remove \p MA from all of MemorySSA's lists.
+///
+/// Because of the way the intrusive list and use lists work, it is important to
+/// do removal in the right order.
+/// ShouldDelete defaults to true, and will cause the memory access to also be
+/// deleted, not just removed.
+void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
+  // The access list owns the reference, so we erase it from the non-owning list
+  // first.
+  if (!isa<MemoryUse>(MA)) {
+    auto DefsIt = PerBlockDefs.find(MA->getBlock());
+    std::unique_ptr<DefsList> &Defs = DefsIt->second;
+    Defs->remove(*MA);
+    if (Defs->empty())
+      PerBlockDefs.erase(DefsIt);
+  }
+
+  // The erase call here will delete it. If we don't want it deleted, we call
+  // remove instead.
+  auto AccessIt = PerBlockAccesses.find(MA->getBlock());
+  std::unique_ptr<AccessList> &Accesses = AccessIt->second;
+  if (ShouldDelete)
+    Accesses->erase(MA);
+  else
+    Accesses->remove(MA);
+
+  if (Accesses->empty())
+    PerBlockAccesses.erase(AccessIt);
+}
+
+void MemorySSA::print(raw_ostream &OS) const {
+  MemorySSAAnnotatedWriter Writer(this);
+  F.print(OS, &Writer);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
+#endif
+
+void MemorySSA::verifyMemorySSA() const {
+  verifyDefUses(F);
+  verifyDomination(F);
+  verifyOrdering(F);
+  Walker->verify(this);
+}
+
+/// \brief Verify that the order and existence of MemoryAccesses matches the
+/// order and existence of memory affecting instructions.
+void MemorySSA::verifyOrdering(Function &F) const {
+  // Walk all the blocks, comparing what the lookups think and what the access
+  // lists think, as well as the order in the blocks vs the order in the access
+  // lists.
+  SmallVector<MemoryAccess *, 32> ActualAccesses;
+  SmallVector<MemoryAccess *, 32> ActualDefs;
+  for (BasicBlock &B : F) {
+    const AccessList *AL = getBlockAccesses(&B);
+    const auto *DL = getBlockDefs(&B);
+    MemoryAccess *Phi = getMemoryAccess(&B);
+    if (Phi) {
+      ActualAccesses.push_back(Phi);
+      ActualDefs.push_back(Phi);
+    }
+
+    for (Instruction &I : B) {
+      MemoryAccess *MA = getMemoryAccess(&I);
+      assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
+             "We have memory affecting instructions "
+             "in this block but they are not in the "
+             "access list or defs list");
+      if (MA) {
+        ActualAccesses.push_back(MA);
+        if (isa<MemoryDef>(MA))
+          ActualDefs.push_back(MA);
+      }
+    }
+    // Either we hit the assert, really have no accesses, or we have both
+    // accesses and an access list.
+    // Same with defs.
+    if (!AL && !DL)
+      continue;
+    assert(AL->size() == ActualAccesses.size() &&
+           "We don't have the same number of accesses in the block as on the "
+           "access list");
+    assert((DL || ActualDefs.size() == 0) &&
+           "Either we should have a defs list, or we should have no defs");
+    assert((!DL || DL->size() == ActualDefs.size()) &&
+           "We don't have the same number of defs in the block as on the "
+           "def list");
+    auto ALI = AL->begin();
+    auto AAI = ActualAccesses.begin();
+    while (ALI != AL->end() && AAI != ActualAccesses.end()) {
+      assert(&*ALI == *AAI && "Not the same accesses in the same order");
+      ++ALI;
+      ++AAI;
+    }
+    ActualAccesses.clear();
+    if (DL) {
+      auto DLI = DL->begin();
+      auto ADI = ActualDefs.begin();
+      while (DLI != DL->end() && ADI != ActualDefs.end()) {
+        assert(&*DLI == *ADI && "Not the same defs in the same order");
+        ++DLI;
+        ++ADI;
+      }
+    }
+    ActualDefs.clear();
+  }
+}
+
+/// \brief Verify the domination properties of MemorySSA by checking that each
+/// definition dominates all of its uses.
+void MemorySSA::verifyDomination(Function &F) const {
+#ifndef NDEBUG
+  for (BasicBlock &B : F) {
+    // Phi nodes are attached to basic blocks
+    if (MemoryPhi *MP = getMemoryAccess(&B))
+      for (const Use &U : MP->uses())
+        assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
+
+    for (Instruction &I : B) {
+      MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
+      if (!MD)
+        continue;
+
+      for (const Use &U : MD->uses())
+        assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
+    }
+  }
+#endif
+}
+
+/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
+/// appears in the use list of \p Def.
+
+void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
+#ifndef NDEBUG
+  // The live on entry use may cause us to get a NULL def here
+  if (!Def)
+    assert(isLiveOnEntryDef(Use) &&
+           "Null def but use not point to live on entry def");
+  else
+    assert(is_contained(Def->users(), Use) &&
+           "Did not find use in def's use list");
+#endif
+}
+
+/// \brief Verify the immediate use information, by walking all the memory
+/// accesses and verifying that, for each use, it appears in the
+/// appropriate def's use list
+void MemorySSA::verifyDefUses(Function &F) const {
+  for (BasicBlock &B : F) {
+    // Phi nodes are attached to basic blocks
+    if (MemoryPhi *Phi = getMemoryAccess(&B)) {
+      assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+                                          pred_begin(&B), pred_end(&B))) &&
+             "Incomplete MemoryPhi Node");
+      for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+        verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+    }
+
+    for (Instruction &I : B) {
+      if (MemoryUseOrDef *MA = getMemoryAccess(&I)) {
+        verifyUseInDefs(MA->getDefiningAccess(), MA);
+      }
+    }
+  }
+}
+
+MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
+  return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
+}
+
+MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
+  return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+}
+
+/// Perform a local numbering on blocks so that instruction ordering can be
+/// determined in constant time.
+/// TODO: We currently just number in order.  If we numbered by N, we could
+/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
+/// log2(N) sequences of mixed before and after) without needing to invalidate
+/// the numbering.
+void MemorySSA::renumberBlock(const BasicBlock *B) const {
+  // The pre-increment ensures the numbers really start at 1.
+  unsigned long CurrentNumber = 0;
+  const AccessList *AL = getBlockAccesses(B);
+  assert(AL != nullptr && "Asking to renumber an empty block");
+  for (const auto &I : *AL)
+    BlockNumbering[&I] = ++CurrentNumber;
+  BlockNumberingValid.insert(B);
+}
+
+/// \brief Determine, for two memory accesses in the same block,
+/// whether \p Dominator dominates \p Dominatee.
+/// \returns True if \p Dominator dominates \p Dominatee.
+bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
+                                 const MemoryAccess *Dominatee) const {
+
+  const BasicBlock *DominatorBlock = Dominator->getBlock();
+
+  assert((DominatorBlock == Dominatee->getBlock()) &&
+         "Asking for local domination when accesses are in different blocks!");
+  // A node dominates itself.
+  if (Dominatee == Dominator)
+    return true;
+
+  // When Dominatee is defined on function entry, it is not dominated by another
+  // memory access.
+  if (isLiveOnEntryDef(Dominatee))
+    return false;
+
+  // When Dominator is defined on function entry, it dominates the other memory
+  // access.
+  if (isLiveOnEntryDef(Dominator))
+    return true;
+
+  if (!BlockNumberingValid.count(DominatorBlock))
+    renumberBlock(DominatorBlock);
+
+  unsigned long DominatorNum = BlockNumbering.lookup(Dominator);
+  // All numbers start with 1
+  assert(DominatorNum != 0 && "Block was not numbered properly");
+  unsigned long DominateeNum = BlockNumbering.lookup(Dominatee);
+  assert(DominateeNum != 0 && "Block was not numbered properly");
+  return DominatorNum < DominateeNum;
+}
+
+bool MemorySSA::dominates(const MemoryAccess *Dominator,
+                          const MemoryAccess *Dominatee) const {
+  if (Dominator == Dominatee)
+    return true;
+
+  if (isLiveOnEntryDef(Dominatee))
+    return false;
+
+  if (Dominator->getBlock() != Dominatee->getBlock())
+    return DT->dominates(Dominator->getBlock(), Dominatee->getBlock());
+  return locallyDominates(Dominator, Dominatee);
+}
+
+bool MemorySSA::dominates(const MemoryAccess *Dominator,
+                          const Use &Dominatee) const {
+  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) {
+    BasicBlock *UseBB = MP->getIncomingBlock(Dominatee);
+    // The def must dominate the incoming block of the phi.
+    if (UseBB != Dominator->getBlock())
+      return DT->dominates(Dominator->getBlock(), UseBB);
+    // If the UseBB and the DefBB are the same, compare locally.
+    return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee));
+  }
+  // If it's not a PHI node use, the normal dominates can already handle it.
+  return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
+}
+
+const static char LiveOnEntryStr[] = "liveOnEntry";
+
+void MemoryAccess::print(raw_ostream &OS) const {
+  switch (getValueID()) {
+  case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
+  case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS);
+  case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS);
+  }
+  llvm_unreachable("invalid value id");
+}
+
+void MemoryDef::print(raw_ostream &OS) const {
+  MemoryAccess *UO = getDefiningAccess();
+
+  OS << getID() << " = MemoryDef(";
+  if (UO && UO->getID())
+    OS << UO->getID();
+  else
+    OS << LiveOnEntryStr;
+  OS << ')';
+}
+
+void MemoryPhi::print(raw_ostream &OS) const {
+  bool First = true;
+  OS << getID() << " = MemoryPhi(";
+  for (const auto &Op : operands()) {
+    BasicBlock *BB = getIncomingBlock(Op);
+    MemoryAccess *MA = cast<MemoryAccess>(Op);
+    if (!First)
+      OS << ',';
+    else
+      First = false;
+
+    OS << '{';
+    if (BB->hasName())
+      OS << BB->getName();
+    else
+      BB->printAsOperand(OS, false);
+    OS << ',';
+    if (unsigned ID = MA->getID())
+      OS << ID;
+    else
+      OS << LiveOnEntryStr;
+    OS << '}';
+  }
+  OS << ')';
+}
+
+void MemoryUse::print(raw_ostream &OS) const {
+  MemoryAccess *UO = getDefiningAccess();
+  OS << "MemoryUse(";
+  if (UO && UO->getID())
+    OS << UO->getID();
+  else
+    OS << LiveOnEntryStr;
+  OS << ')';
+}
+
+void MemoryAccess::dump() const {
+// Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  print(dbgs());
+  dbgs() << "\n";
+#endif
+}
+
+char MemorySSAPrinterLegacyPass::ID = 0;
+
+MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
+  initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
+}
+
+void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MemorySSAWrapperPass>();
+}
+
+bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
+  auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  MSSA.print(dbgs());
+  if (VerifyMemorySSA)
+    MSSA.verifyMemorySSA();
+  return false;
+}
+
+AnalysisKey MemorySSAAnalysis::Key;
+
+MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
+                                                 FunctionAnalysisManager &AM) {
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &AA = AM.getResult<AAManager>(F);
+  return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT));
+}
+
+PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
+                                            FunctionAnalysisManager &AM) {
+  OS << "MemorySSA for function: " << F.getName() << "\n";
+  AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS);
+
+  return PreservedAnalyses::all();
+}
+
+PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
+                                             FunctionAnalysisManager &AM) {
+  AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
+
+  return PreservedAnalyses::all();
+}
+
+char MemorySSAWrapperPass::ID = 0;
+
+MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {
+  initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
+
+void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<AAResultsWrapperPass>();
+}
+
+bool MemorySSAWrapperPass::runOnFunction(Function &F) {
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+  MSSA.reset(new MemorySSA(F, &AA, &DT));
+  return false;
+}
+
+void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
+
+void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
+  MSSA->print(OS);
+}
+
+MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
+
+MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
+                                        DominatorTree *D)
+    : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {}
+
+MemorySSA::CachingWalker::~CachingWalker() {}
+
+void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
+  if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+    MUD->resetOptimized();
+}
+
+/// \brief Walk the use-def chains starting at \p MA and find
+/// the MemoryAccess that actually clobbers Loc.
+///
+/// \returns our clobbering memory access
+MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+    MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
+  MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
+#ifdef EXPENSIVE_CHECKS
+  MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q);
+  assert(NewNoCache == New && "Cache made us hand back a different result?");
+  (void)NewNoCache;
+#endif
+  if (AutoResetWalker)
+    resetClobberWalker();
+  return New;
+}
+
+MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+    MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
+  if (isa<MemoryPhi>(StartingAccess))
+    return StartingAccess;
+
+  auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess);
+  if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
+    return StartingUseOrDef;
+
+  Instruction *I = StartingUseOrDef->getMemoryInst();
+
+  // Conservatively, fences are always clobbers, so don't perform the walk if we
+  // hit a fence.
+  if (!ImmutableCallSite(I) && I->isFenceLike())
+    return StartingUseOrDef;
+
+  UpwardsMemoryQuery Q;
+  Q.OriginalAccess = StartingUseOrDef;
+  Q.StartingLoc = Loc;
+  Q.Inst = I;
+  Q.IsCall = false;
+
+  // Unlike the other function, do not walk to the def of a def, because we are
+  // handed something we already believe is the clobbering access.
+  MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
+                                     ? StartingUseOrDef->getDefiningAccess()
+                                     : StartingUseOrDef;
+
+  MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
+  DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+  DEBUG(dbgs() << *StartingUseOrDef << "\n");
+  DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+  DEBUG(dbgs() << *Clobber << "\n");
+  return Clobber;
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+  auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
+  // If this is a MemoryPhi, we can't do anything.
+  if (!StartingAccess)
+    return MA;
+
+  // If this is an already optimized use or def, return the optimized result.
+  // Note: Currently, we do not store the optimized def result because we'd need
+  // a separate field, since we can't use it as the defining access.
+  if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+    if (MUD->isOptimized())
+      return MUD->getOptimized();
+
+  const Instruction *I = StartingAccess->getMemoryInst();
+  UpwardsMemoryQuery Q(I, StartingAccess);
+  // We can't sanely do anything with a fences, they conservatively
+  // clobber all memory, and have no locations to get pointers from to
+  // try to disambiguate.
+  if (!Q.IsCall && I->isFenceLike())
+    return StartingAccess;
+
+  if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
+    MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+      MUD->setOptimized(LiveOnEntry);
+    return LiveOnEntry;
+  }
+
+  // Start with the thing we already think clobbers this location
+  MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+
+  // At this point, DefiningAccess may be the live on entry def.
+  // If it is, we will not get a better result.
+  if (MSSA->isLiveOnEntryDef(DefiningAccess))
+    return DefiningAccess;
+
+  MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
+  DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
+  DEBUG(dbgs() << *DefiningAccess << "\n");
+  DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
+  DEBUG(dbgs() << *Result << "\n");
+  if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+    MUD->setOptimized(Result);
+
+  return Result;
+}
+
+MemoryAccess *
+DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+  if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
+    return Use->getDefiningAccess();
+  return MA;
+}
+
+MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
+    MemoryAccess *StartingAccess, const MemoryLocation &) {
+  if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
+    return Use->getDefiningAccess();
+  return StartingAccess;
+}
+} // namespace llvm
+
+void MemoryPhi::deleteMe(DerivedUser *Self) {
+  delete static_cast<MemoryPhi *>(Self);
+}
+
+void MemoryDef::deleteMe(DerivedUser *Self) {
+  delete static_cast<MemoryDef *>(Self);
+}
+
+void MemoryUse::deleteMe(DerivedUser *Self) {
+  delete static_cast<MemoryUse *>(Self);
+}
diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
new file mode 100644
index 0000000..1ff8447
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -0,0 +1,488 @@
+//===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the MemorySSAUpdater class.
+//
+//===----------------------------------------------------------------===//
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "memoryssa"
+using namespace llvm;
+
+// This is the marker algorithm from "Simple and Efficient Construction of
+// Static Single Assignment Form"
+// The simple, non-marker algorithm places phi nodes at any join
+// Here, we place markers, and only place phi nodes if they end up necessary.
+// They are only necessary if they break a cycle (IE we recursively visit
+// ourselves again), or we discover, while getting the value of the operands,
+// that there are two or more definitions needing to be merged.
+// This still will leave non-minimal form in the case of irreducible control
+// flow, where phi nodes may be in cycles with themselves, but unnecessary.
+MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
+  // Single predecessor case, just recurse, we can only have one definition.
+  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+    return getPreviousDefFromEnd(Pred);
+  } else if (VisitedBlocks.count(BB)) {
+    // We hit our node again, meaning we had a cycle, we must insert a phi
+    // node to break it so we have an operand. The only case this will
+    // insert useless phis is if we have irreducible control flow.
+    return MSSA->createMemoryPhi(BB);
+  } else if (VisitedBlocks.insert(BB).second) {
+    // Mark us visited so we can detect a cycle
+    SmallVector<MemoryAccess *, 8> PhiOps;
+
+    // Recurse to get the values in our predecessors for placement of a
+    // potential phi node. This will insert phi nodes if we cycle in order to
+    // break the cycle and have an operand.
+    for (auto *Pred : predecessors(BB))
+      PhiOps.push_back(getPreviousDefFromEnd(Pred));
+
+    // Now try to simplify the ops to avoid placing a phi.
+    // This may return null if we never created a phi yet, that's okay
+    MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
+    bool PHIExistsButNeedsUpdate = false;
+    // See if the existing phi operands match what we need.
+    // Unlike normal SSA, we only allow one phi node per block, so we can't just
+    // create a new one.
+    if (Phi && Phi->getNumOperands() != 0)
+      if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
+        PHIExistsButNeedsUpdate = true;
+      }
+
+    // See if we can avoid the phi by simplifying it.
+    auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
+    // If we couldn't simplify, we may have to create a phi
+    if (Result == Phi) {
+      if (!Phi)
+        Phi = MSSA->createMemoryPhi(BB);
+
+      // These will have been filled in by the recursive read we did above.
+      if (PHIExistsButNeedsUpdate) {
+        std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+        std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
+      } else {
+        unsigned i = 0;
+        for (auto *Pred : predecessors(BB))
+          Phi->addIncoming(PhiOps[i++], Pred);
+      }
+
+      Result = Phi;
+    }
+    if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Result))
+      InsertedPHIs.push_back(MP);
+    // Set ourselves up for the next variable by resetting visited state.
+    VisitedBlocks.erase(BB);
+    return Result;
+  }
+  llvm_unreachable("Should have hit one of the three cases above");
+}
+
+// This starts at the memory access, and goes backwards in the block to find the
+// previous definition. If a definition is not found the block of the access,
+// it continues globally, creating phi nodes to ensure we have a single
+// definition.
+MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
+  auto *LocalResult = getPreviousDefInBlock(MA);
+
+  return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock());
+}
+
+// This starts at the memory access, and goes backwards in the block to the find
+// the previous definition. If the definition is not found in the block of the
+// access, it returns nullptr.
+MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
+  auto *Defs = MSSA->getWritableBlockDefs(MA->getBlock());
+
+  // It's possible there are no defs, or we got handed the first def to start.
+  if (Defs) {
+    // If this is a def, we can just use the def iterators.
+    if (!isa<MemoryUse>(MA)) {
+      auto Iter = MA->getReverseDefsIterator();
+      ++Iter;
+      if (Iter != Defs->rend())
+        return &*Iter;
+    } else {
+      // Otherwise, have to walk the all access iterator.
+      auto End = MSSA->getWritableBlockAccesses(MA->getBlock())->rend();
+      for (auto &U : make_range(++MA->getReverseIterator(), End))
+        if (!isa<MemoryUse>(U))
+          return cast<MemoryAccess>(&U);
+      // Note that if MA comes before Defs->begin(), we won't hit a def.
+      return nullptr;
+    }
+  }
+  return nullptr;
+}
+
+// This starts at the end of block
+MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) {
+  auto *Defs = MSSA->getWritableBlockDefs(BB);
+
+  if (Defs)
+    return &*Defs->rbegin();
+
+  return getPreviousDefRecursive(BB);
+}
+// Recurse over a set of phi uses to eliminate the trivial ones
+MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
+  if (!Phi)
+    return nullptr;
+  TrackingVH<MemoryAccess> Res(Phi);
+  SmallVector<TrackingVH<Value>, 8> Uses;
+  std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses));
+  for (auto &U : Uses) {
+    if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) {
+      auto OperRange = UsePhi->operands();
+      tryRemoveTrivialPhi(UsePhi, OperRange);
+    }
+  }
+  return Res;
+}
+
+// Eliminate trivial phis
+// Phis are trivial if they are defined either by themselves, or all the same
+// argument.
+// IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c)
+// We recursively try to remove them.
+template <class RangeType>
+MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
+                                                    RangeType &Operands) {
+  // Detect equal or self arguments
+  MemoryAccess *Same = nullptr;
+  for (auto &Op : Operands) {
+    // If the same or self, good so far
+    if (Op == Phi || Op == Same)
+      continue;
+    // not the same, return the phi since it's not eliminatable by us
+    if (Same)
+      return Phi;
+    Same = cast<MemoryAccess>(Op);
+  }
+  // Never found a non-self reference, the phi is undef
+  if (Same == nullptr)
+    return MSSA->getLiveOnEntryDef();
+  if (Phi) {
+    Phi->replaceAllUsesWith(Same);
+    removeMemoryAccess(Phi);
+  }
+
+  // We should only end up recursing in case we replaced something, in which
+  // case, we may have made other Phis trivial.
+  return recursePhi(Same);
+}
+
+void MemorySSAUpdater::insertUse(MemoryUse *MU) {
+  InsertedPHIs.clear();
+  MU->setDefiningAccess(getPreviousDef(MU));
+  // Unlike for defs, there is no extra work to do.  Because uses do not create
+  // new may-defs, there are only two cases:
+  //
+  // 1. There was a def already below us, and therefore, we should not have
+  // created a phi node because it was already needed for the def.
+  //
+  // 2. There is no def below us, and therefore, there is no extra renaming work
+  // to do.
+}
+
+// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
+static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
+                                      MemoryAccess *NewDef) {
+  // Replace any operand with us an incoming block with the new defining
+  // access.
+  int i = MP->getBasicBlockIndex(BB);
+  assert(i != -1 && "Should have found the basic block in the phi");
+  // We can't just compare i against getNumOperands since one is signed and the
+  // other not. So use it to index into the block iterator.
+  for (auto BBIter = MP->block_begin() + i; BBIter != MP->block_end();
+       ++BBIter) {
+    if (*BBIter != BB)
+      break;
+    MP->setIncomingValue(i, NewDef);
+    ++i;
+  }
+}
+
+// A brief description of the algorithm:
+// First, we compute what should define the new def, using the SSA
+// construction algorithm.
+// Then, we update the defs below us (and any new phi nodes) in the graph to
+// point to the correct new defs, to ensure we only have one variable, and no
+// disconnected stores.
+void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
+  InsertedPHIs.clear();
+
+  // See if we had a local def, and if not, go hunting.
+  MemoryAccess *DefBefore = getPreviousDefInBlock(MD);
+  bool DefBeforeSameBlock = DefBefore != nullptr;
+  if (!DefBefore)
+    DefBefore = getPreviousDefRecursive(MD->getBlock());
+
+  // There is a def before us, which means we can replace any store/phi uses
+  // of that thing with us, since we are in the way of whatever was there
+  // before.
+  // We now define that def's memorydefs and memoryphis
+  if (DefBeforeSameBlock) {
+    for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
+         UI != UE;) {
+      Use &U = *UI++;
+      // Leave the uses alone
+      if (isa<MemoryUse>(U.getUser()))
+        continue;
+      U.set(MD);
+    }
+  }
+
+  // and that def is now our defining access.
+  // We change them in this order otherwise we will appear in the use list
+  // above and reset ourselves.
+  MD->setDefiningAccess(DefBefore);
+
+  SmallVector<MemoryAccess *, 8> FixupList(InsertedPHIs.begin(),
+                                           InsertedPHIs.end());
+  if (!DefBeforeSameBlock) {
+    // If there was a local def before us, we must have the same effect it
+    // did. Because every may-def is the same, any phis/etc we would create, it
+    // would also have created.  If there was no local def before us, we
+    // performed a global update, and have to search all successors and make
+    // sure we update the first def in each of them (following all paths until
+    // we hit the first def along each path). This may also insert phi nodes.
+    // TODO: There are other cases we can skip this work, such as when we have a
+    // single successor, and only used a straight line of single pred blocks
+    // backwards to find the def.  To make that work, we'd have to track whether
+    // getDefRecursive only ever used the single predecessor case.  These types
+    // of paths also only exist in between CFG simplifications.
+    FixupList.push_back(MD);
+  }
+
+  while (!FixupList.empty()) {
+    unsigned StartingPHISize = InsertedPHIs.size();
+    fixupDefs(FixupList);
+    FixupList.clear();
+    // Put any new phis on the fixup list, and process them
+    FixupList.append(InsertedPHIs.end() - StartingPHISize, InsertedPHIs.end());
+  }
+  // Now that all fixups are done, rename all uses if we are asked.
+  if (RenameUses) {
+    SmallPtrSet<BasicBlock *, 16> Visited;
+    BasicBlock *StartBlock = MD->getBlock();
+    // We are guaranteed there is a def in the block, because we just got it
+    // handed to us in this function.
+    MemoryAccess *FirstDef = &*MSSA->getWritableBlockDefs(StartBlock)->begin();
+    // Convert to incoming value if it's a memorydef. A phi *is* already an
+    // incoming value.
+    if (auto *MD = dyn_cast<MemoryDef>(FirstDef))
+      FirstDef = MD->getDefiningAccess();
+
+    MSSA->renamePass(MD->getBlock(), FirstDef, Visited);
+    // We just inserted a phi into this block, so the incoming value will become
+    // the phi anyway, so it does not matter what we pass.
+    for (auto *MP : InsertedPHIs)
+      MSSA->renamePass(MP->getBlock(), nullptr, Visited);
+  }
+}
+
+void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) {
+  SmallPtrSet<const BasicBlock *, 8> Seen;
+  SmallVector<const BasicBlock *, 16> Worklist;
+  for (auto *NewDef : Vars) {
+    // First, see if there is a local def after the operand.
+    auto *Defs = MSSA->getWritableBlockDefs(NewDef->getBlock());
+    auto DefIter = NewDef->getDefsIterator();
+
+    // If there is a local def after us, we only have to rename that.
+    if (++DefIter != Defs->end()) {
+      cast<MemoryDef>(DefIter)->setDefiningAccess(NewDef);
+      continue;
+    }
+
+    // Otherwise, we need to search down through the CFG.
+    // For each of our successors, handle it directly if their is a phi, or
+    // place on the fixup worklist.
+    for (const auto *S : successors(NewDef->getBlock())) {
+      if (auto *MP = MSSA->getMemoryAccess(S))
+        setMemoryPhiValueForBlock(MP, NewDef->getBlock(), NewDef);
+      else
+        Worklist.push_back(S);
+    }
+
+    while (!Worklist.empty()) {
+      const BasicBlock *FixupBlock = Worklist.back();
+      Worklist.pop_back();
+
+      // Get the first def in the block that isn't a phi node.
+      if (auto *Defs = MSSA->getWritableBlockDefs(FixupBlock)) {
+        auto *FirstDef = &*Defs->begin();
+        // The loop above and below should have taken care of phi nodes
+        assert(!isa<MemoryPhi>(FirstDef) &&
+               "Should have already handled phi nodes!");
+        // We are now this def's defining access, make sure we actually dominate
+        // it
+        assert(MSSA->dominates(NewDef, FirstDef) &&
+               "Should have dominated the new access");
+
+        // This may insert new phi nodes, because we are not guaranteed the
+        // block we are processing has a single pred, and depending where the
+        // store was inserted, it may require phi nodes below it.
+        cast<MemoryDef>(FirstDef)->setDefiningAccess(getPreviousDef(FirstDef));
+        return;
+      }
+      // We didn't find a def, so we must continue.
+      for (const auto *S : successors(FixupBlock)) {
+        // If there is a phi node, handle it.
+        // Otherwise, put the block on the worklist
+        if (auto *MP = MSSA->getMemoryAccess(S))
+          setMemoryPhiValueForBlock(MP, FixupBlock, NewDef);
+        else {
+          // If we cycle, we should have ended up at a phi node that we already
+          // processed.  FIXME: Double check this
+          if (!Seen.insert(S).second)
+            continue;
+          Worklist.push_back(S);
+        }
+      }
+    }
+  }
+}
+
+// Move What before Where in the MemorySSA IR.
+template <class WhereType>
+void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+                              WhereType Where) {
+  // Replace all our users with our defining access.
+  What->replaceAllUsesWith(What->getDefiningAccess());
+
+  // Let MemorySSA take care of moving it around in the lists.
+  MSSA->moveTo(What, BB, Where);
+
+  // Now reinsert it into the IR and do whatever fixups needed.
+  if (auto *MD = dyn_cast<MemoryDef>(What))
+    insertDef(MD);
+  else
+    insertUse(cast<MemoryUse>(What));
+}
+
+// Move What before Where in the MemorySSA IR.
+void MemorySSAUpdater::moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
+  moveTo(What, Where->getBlock(), Where->getIterator());
+}
+
+// Move What after Where in the MemorySSA IR.
+void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
+  moveTo(What, Where->getBlock(), ++Where->getIterator());
+}
+
+void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
+                                   MemorySSA::InsertionPlace Where) {
+  return moveTo(What, BB, Where);
+}
+
+/// \brief If all arguments of a MemoryPHI are defined by the same incoming
+/// argument, return that argument.
+static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
+  MemoryAccess *MA = nullptr;
+
+  for (auto &Arg : MP->operands()) {
+    if (!MA)
+      MA = cast<MemoryAccess>(Arg);
+    else if (MA != Arg)
+      return nullptr;
+  }
+  return MA;
+}
+
+void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
+  assert(!MSSA->isLiveOnEntryDef(MA) &&
+         "Trying to remove the live on entry def");
+  // We can only delete phi nodes if they have no uses, or we can replace all
+  // uses with a single definition.
+  MemoryAccess *NewDefTarget = nullptr;
+  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
+    // Note that it is sufficient to know that all edges of the phi node have
+    // the same argument.  If they do, by the definition of dominance frontiers
+    // (which we used to place this phi), that argument must dominate this phi,
+    // and thus, must dominate the phi's uses, and so we will not hit the assert
+    // below.
+    NewDefTarget = onlySingleValue(MP);
+    assert((NewDefTarget || MP->use_empty()) &&
+           "We can't delete this memory phi");
+  } else {
+    NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
+  }
+
+  // Re-point the uses at our defining access
+  if (!isa<MemoryUse>(MA) && !MA->use_empty()) {
+    // Reset optimized on users of this store, and reset the uses.
+    // A few notes:
+    // 1. This is a slightly modified version of RAUW to avoid walking the
+    // uses twice here.
+    // 2. If we wanted to be complete, we would have to reset the optimized
+    // flags on users of phi nodes if doing the below makes a phi node have all
+    // the same arguments. Instead, we prefer users to removeMemoryAccess those
+    // phi nodes, because doing it here would be N^3.
+    if (MA->hasValueHandle())
+      ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget);
+    // Note: We assume MemorySSA is not used in metadata since it's not really
+    // part of the IR.
+
+    while (!MA->use_empty()) {
+      Use &U = *MA->use_begin();
+      if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
+        MUD->resetOptimized();
+      U.set(NewDefTarget);
+    }
+  }
+
+  // The call below to erase will destroy MA, so we can't change the order we
+  // are doing things here
+  MSSA->removeFromLookups(MA);
+  MSSA->removeFromLists(MA);
+}
+
+MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
+    Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
+    MemorySSA::InsertionPlace Point) {
+  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+  MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
+  return NewAccess;
+}
+
+MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessBefore(
+    Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt) {
+  assert(I->getParent() == InsertPt->getBlock() &&
+         "New and old access must be in the same block");
+  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+  MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(),
+                              InsertPt->getIterator());
+  return NewAccess;
+}
+
+MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter(
+    Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt) {
+  assert(I->getParent() == InsertPt->getBlock() &&
+         "New and old access must be in the same block");
+  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+  MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(),
+                              ++InsertPt->getIterator());
+  return NewAccess;
+}
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f675830..e12cdf9 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -15,8 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index f5ba637..e9e354e 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -28,7 +28,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -37,7 +37,8 @@ using namespace llvm;
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
-static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges,
+static void findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
+                         SetVector<ValueInfo> &RefEdges,
                          SmallPtrSet<const User *, 8> &Visited) {
   SmallVector<const User *, 32> Worklist;
   Worklist.push_back(CurUser);
@@ -61,7 +62,7 @@ static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges,
         // the reference set unless it is a callee. Callees are handled
         // specially by WriteFunction and are added to a separate list.
         if (!(CS && CS.isCallee(&OI)))
-          RefEdges.insert(GV);
+          RefEdges.insert(Index.getOrInsertValueInfo(GV));
         continue;
       }
       Worklist.push_back(Operand);
@@ -84,6 +85,92 @@ static bool isNonRenamableLocal(const GlobalValue &GV) {
   return GV.hasSection() && GV.hasLocalLinkage();
 }
 
+/// Determine whether this call has all constant integer arguments (excluding
+/// "this") and summarize it to VCalls or ConstVCalls as appropriate.
+static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
+                          SetVector<FunctionSummary::VFuncId> &VCalls,
+                          SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
+  std::vector<uint64_t> Args;
+  // Start from the second argument to skip the "this" pointer.
+  for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) {
+    auto *CI = dyn_cast<ConstantInt>(Arg);
+    if (!CI || CI->getBitWidth() > 64) {
+      VCalls.insert({Guid, Call.Offset});
+      return;
+    }
+    Args.push_back(CI->getZExtValue());
+  }
+  ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)});
+}
+
+/// If this intrinsic call requires that we add information to the function
+/// summary, do so via the non-constant reference arguments.
+static void addIntrinsicToSummary(
+    const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
+    SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
+    SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
+    SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
+    SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) {
+  switch (CI->getCalledFunction()->getIntrinsicID()) {
+  case Intrinsic::type_test: {
+    auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+    auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+    if (!TypeId)
+      break;
+    GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
+
+    // Produce a summary from type.test intrinsics. We only summarize type.test
+    // intrinsics that are used other than by an llvm.assume intrinsic.
+    // Intrinsics that are assumed are relevant only to the devirtualization
+    // pass, not the type test lowering pass.
+    bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
+      auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
+      if (!AssumeCI)
+        return true;
+      Function *F = AssumeCI->getCalledFunction();
+      return !F || F->getIntrinsicID() != Intrinsic::assume;
+    });
+    if (HasNonAssumeUses)
+      TypeTests.insert(Guid);
+
+    SmallVector<DevirtCallSite, 4> DevirtCalls;
+    SmallVector<CallInst *, 4> Assumes;
+    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+    for (auto &Call : DevirtCalls)
+      addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
+                    TypeTestAssumeConstVCalls);
+
+    break;
+  }
+
+  case Intrinsic::type_checked_load: {
+    auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
+    auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+    if (!TypeId)
+      break;
+    GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
+
+    SmallVector<DevirtCallSite, 4> DevirtCalls;
+    SmallVector<Instruction *, 4> LoadedPtrs;
+    SmallVector<Instruction *, 4> Preds;
+    bool HasNonCallUses = false;
+    findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
+                                               HasNonCallUses, CI);
+    // Any non-call uses of the result of llvm.type.checked.load will
+    // prevent us from optimizing away the llvm.type.test.
+    if (HasNonCallUses)
+      TypeTests.insert(Guid);
+    for (auto &Call : DevirtCalls)
+      addVCallToSet(Call, Guid, TypeCheckedLoadVCalls,
+                    TypeCheckedLoadConstVCalls);
+
+    break;
+  }
+  default:
+    break;
+  }
+}
+
 static void
 computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
                        const Function &F, BlockFrequencyInfo *BFI,
@@ -99,6 +186,10 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
   SetVector<ValueInfo> RefEdges;
   SetVector<GlobalValue::GUID> TypeTests;
+  SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
+      TypeCheckedLoadVCalls;
+  SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
+      TypeCheckedLoadConstVCalls;
   ICallPromotionAnalysis ICallAnalysis;
 
   bool HasInlineAsmMaybeReferencingInternal = false;
@@ -108,7 +199,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       ++NumInsts;
-      findRefEdges(&I, RefEdges, Visited);
+      findRefEdges(Index, &I, RefEdges, Visited);
       auto CS = ImmutableCallSite(&I);
       if (!CS)
         continue;
@@ -133,29 +224,15 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       // Check if this is a direct call to a known function or a known
       // intrinsic, or an indirect call with profile data.
       if (CalledFunction) {
-        if (CalledFunction->isIntrinsic()) {
-          if (CalledFunction->getIntrinsicID() != Intrinsic::type_test)
-            continue;
-          // Produce a summary from type.test intrinsics. We only summarize
-          // type.test intrinsics that are used other than by an llvm.assume
-          // intrinsic. Intrinsics that are assumed are relevant only to the
-          // devirtualization pass, not the type test lowering pass.
-          bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
-            auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
-            if (!AssumeCI)
-              return true;
-            Function *F = AssumeCI->getCalledFunction();
-            return !F || F->getIntrinsicID() != Intrinsic::assume;
-          });
-          if (HasNonAssumeUses) {
-            auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
-            if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()))
-              TypeTests.insert(GlobalValue::getGUID(TypeId->getString()));
-          }
+        if (CI && CalledFunction->isIntrinsic()) {
+          addIntrinsicToSummary(
+              CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
+              TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls);
+          continue;
         }
         // We should have named any anonymous globals
         assert(CalledFunction->hasName());
-        auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+        auto ScaledCount = PSI->getProfileCount(&I, BFI);
         auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
                                    : CalleeInfo::HotnessType::Unknown;
 
@@ -163,7 +240,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
         // to record the call edge to the alias in that case. Eventually
         // an alias summary will be created to associate the alias and
         // aliasee.
-        CallGraphEdges[cast<GlobalValue>(CalledValue)].updateHotness(Hotness);
+        CallGraphEdges[Index.getOrInsertValueInfo(
+                           cast<GlobalValue>(CalledValue))]
+            .updateHotness(Hotness);
       } else {
         // Skip inline assembly calls.
         if (CI && CI->isInlineAsm())
@@ -178,11 +257,17 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
             ICallAnalysis.getPromotionCandidatesForInstruction(
                 &I, NumVals, TotalCount, NumCandidates);
         for (auto &Candidate : CandidateProfileData)
-          CallGraphEdges[Candidate.Value].updateHotness(
-              getHotness(Candidate.Count, PSI));
+          CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
+              .updateHotness(getHotness(Candidate.Count, PSI));
       }
     }
 
+  // Explicit add hot edges to enforce importing for designated GUIDs for
+  // sample PGO, to enable the same inlines as the profiled optimized binary.
+  for (auto &I : F.getImportGUIDs())
+    CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
+        CalleeInfo::HotnessType::Critical);
+
   bool NonRenamableLocal = isNonRenamableLocal(F);
   bool NotEligibleForImport =
       NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
@@ -190,10 +275,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       // FIXME: refactor this to use the same code that inliner is using.
       F.isVarArg();
   GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
-                                    /* LiveRoot = */ false);
+                                    /* Live = */ false);
   auto FuncSummary = llvm::make_unique<FunctionSummary>(
       Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
-      TypeTests.takeVector());
+      TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
+      TypeCheckedLoadVCalls.takeVector(),
+      TypeTestAssumeConstVCalls.takeVector(),
+      TypeCheckedLoadConstVCalls.takeVector());
   if (NonRenamableLocal)
     CantBePromoted.insert(F.getGUID());
   Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
@@ -204,10 +292,10 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
                        DenseSet<GlobalValue::GUID> &CantBePromoted) {
   SetVector<ValueInfo> RefEdges;
   SmallPtrSet<const User *, 8> Visited;
-  findRefEdges(&V, RefEdges, Visited);
+  findRefEdges(Index, &V, RefEdges, Visited);
   bool NonRenamableLocal = isNonRenamableLocal(V);
   GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
-                                    /* LiveRoot = */ false);
+                                    /* Live = */ false);
   auto GVarSummary =
       llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
   if (NonRenamableLocal)
@@ -220,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
                     DenseSet<GlobalValue::GUID> &CantBePromoted) {
   bool NonRenamableLocal = isNonRenamableLocal(A);
   GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
-                                    /* LiveRoot = */ false);
+                                    /* Live = */ false);
   auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
   auto *Aliasee = A.getBaseObject();
   auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
@@ -233,18 +321,16 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
 
 // Set LiveRoot flag on entries matching the given value name.
 static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
-  auto SummaryList =
-      Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name));
-  if (SummaryList == Index.end())
-    return;
-  for (auto &Summary : SummaryList->second)
-    Summary->setLiveRoot();
+  if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
+    for (auto &Summary : VI.getSummaryList())
+      Summary->setLive(true);
 }
 
 ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     const Module &M,
     std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
     ProfileSummaryInfo *PSI) {
+  assert(PSI);
   ModuleSummaryIndex Index;
 
   // Identify the local values in the llvm.used and llvm.compiler.used sets,
@@ -326,9 +412,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     // be listed on the llvm.used or llvm.compiler.used global and marked as
     // referenced from there.
     ModuleSymbolTable::CollectAsmSymbols(
-        Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
-        [&M, &Index, &CantBePromoted](StringRef Name,
-                                      object::BasicSymbolRef::Flags Flags) {
+        M, [&M, &Index, &CantBePromoted](StringRef Name,
+                                         object::BasicSymbolRef::Flags Flags) {
           // Symbols not marked as Weak or Global are local definitions.
           if (Flags & (object::BasicSymbolRef::SF_Weak |
                        object::BasicSymbolRef::SF_Global))
@@ -338,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
             return;
           assert(GV->isDeclaration() && "Def in module asm already has definition");
           GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
-                                              /* NotEligibleToImport */ true,
-                                              /* LiveRoot */ true);
+                                              /* NotEligibleToImport = */ true,
+                                              /* Live = */ true);
           CantBePromoted.insert(GlobalValue::getGUID(Name));
           // Create the appropriate summary type.
           if (isa<Function>(GV)) {
@@ -347,7 +432,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
                 llvm::make_unique<FunctionSummary>(
                     GVFlags, 0, ArrayRef<ValueInfo>{},
                     ArrayRef<FunctionSummary::EdgeTy>{},
-                    ArrayRef<GlobalValue::GUID>{});
+                    ArrayRef<GlobalValue::GUID>{},
+                    ArrayRef<FunctionSummary::VFuncId>{},
+                    ArrayRef<FunctionSummary::VFuncId>{},
+                    ArrayRef<FunctionSummary::ConstVCall>{},
+                    ArrayRef<FunctionSummary::ConstVCall>{});
             Index.addGlobalValueSummary(Name, std::move(Summary));
           } else {
             std::unique_ptr<GlobalVarSummary> Summary =
@@ -358,13 +447,27 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
         });
   }
 
+  bool IsThinLTO = true;
+  if (auto *MD =
+          mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+    IsThinLTO = MD->getZExtValue();
+
   for (auto &GlobalList : Index) {
-    assert(GlobalList.second.size() == 1 &&
+    // Ignore entries for references that are undefined in the current module.
+    if (GlobalList.second.SummaryList.empty())
+      continue;
+
+    assert(GlobalList.second.SummaryList.size() == 1 &&
            "Expected module's index to have one summary per GUID");
-    auto &Summary = GlobalList.second[0];
+    auto &Summary = GlobalList.second.SummaryList[0];
+    if (!IsThinLTO) {
+      Summary->setNotEligibleToImport();
+      continue;
+    }
+
     bool AllRefsCanBeExternallyReferenced =
         llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
-          return !CantBePromoted.count(VI.getValue()->getGUID());
+          return !CantBePromoted.count(VI.getGUID());
         });
     if (!AllRefsCanBeExternallyReferenced) {
       Summary->setNotEligibleToImport();
@@ -374,9 +477,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
       bool AllCallsCanBeExternallyReferenced = llvm::all_of(
           FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
-            auto GUID = Edge.first.isGUID() ? Edge.first.getGUID()
-                                            : Edge.first.getValue()->getGUID();
-            return !CantBePromoted.count(GUID);
+            return !CantBePromoted.count(Edge.first.getGUID());
           });
       if (!AllCallsCanBeExternallyReferenced)
         Summary->setNotEligibleToImport();
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index 1e75c08..f374dd3 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -20,8 +20,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ObjCARCInstKind.h"
-#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
 #include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
index fa8b07d..eb259fd 100644
--- a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -23,14 +23,14 @@
 
 using namespace llvm;
 
-OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
     : F(F), BFI(nullptr) {
-  if (!F->getContext().getDiagnosticHotnessRequested())
+  if (!F->getContext().getDiagnosticsHotnessRequested())
     return;
 
   // First create a dominator tree.
   DominatorTree DT;
-  DT.recalculate(*F);
+  DT.recalculate(*const_cast<Function *>(F));
 
   // Generate LoopInfo from it.
   LoopInfo LI;
@@ -45,6 +45,18 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
   BFI = OwnedBFI.get();
 }
 
+bool OptimizationRemarkEmitter::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  // This analysis has no state and so can be trivially preserved but it needs
+  // a fresh view of BFI if it was constructed with one.
+  if (BFI && Inv.invalidate<BlockFrequencyAnalysis>(F, PA))
+    return true;
+
+  // Otherwise this analysis result remains valid.
+  return false;
+}
+
 Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
@@ -55,53 +67,59 @@ Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
 namespace llvm {
 namespace yaml {
 
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
-  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
-    assert(io.outputting() && "input not yet implemented");
+void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
+    IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
+  assert(io.outputting() && "input not yet implemented");
+
+  if (io.mapTag("!Passed",
+                (OptDiag->getKind() == DK_OptimizationRemark ||
+                 OptDiag->getKind() == DK_MachineOptimizationRemark)))
+    ;
+  else if (io.mapTag(
+               "!Missed",
+               (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
+                OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
+    ;
+  else if (io.mapTag(
+               "!Analysis",
+               (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
+                OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
+    ;
+  else if (io.mapTag("!AnalysisFPCommute",
+                     OptDiag->getKind() ==
+                         DK_OptimizationRemarkAnalysisFPCommute))
+    ;
+  else if (io.mapTag("!AnalysisAliasing",
+                     OptDiag->getKind() ==
+                         DK_OptimizationRemarkAnalysisAliasing))
+    ;
+  else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
+    ;
+  else
+    llvm_unreachable("Unknown remark type");
 
-    if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark))
-      ;
-    else if (io.mapTag("!Missed",
-                       OptDiag->getKind() == DK_OptimizationRemarkMissed))
-      ;
-    else if (io.mapTag("!Analysis",
-                       OptDiag->getKind() == DK_OptimizationRemarkAnalysis))
-      ;
-    else if (io.mapTag("!AnalysisFPCommute",
-                       OptDiag->getKind() ==
-                           DK_OptimizationRemarkAnalysisFPCommute))
-      ;
-    else if (io.mapTag("!AnalysisAliasing",
-                       OptDiag->getKind() ==
-                           DK_OptimizationRemarkAnalysisAliasing))
-      ;
-    else
-      llvm_unreachable("todo");
-
-    // These are read-only for now.
-    DebugLoc DL = OptDiag->getDebugLoc();
-    StringRef FN = GlobalValue::getRealLinkageName(
-        OptDiag->getFunction().getName());
-
-    StringRef PassName(OptDiag->PassName);
-    io.mapRequired("Pass", PassName);
-    io.mapRequired("Name", OptDiag->RemarkName);
-    if (!io.outputting() || DL)
-      io.mapOptional("DebugLoc", DL);
-    io.mapRequired("Function", FN);
-    io.mapOptional("Hotness", OptDiag->Hotness);
-    io.mapOptional("Args", OptDiag->Args);
-  }
-};
+  // These are read-only for now.
+  DiagnosticLocation DL = OptDiag->getLocation();
+  StringRef FN =
+      GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
 
-template <> struct MappingTraits<DebugLoc> {
-  static void mapping(IO &io, DebugLoc &DL) {
+  StringRef PassName(OptDiag->PassName);
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", OptDiag->RemarkName);
+  if (!io.outputting() || DL.isValid())
+    io.mapOptional("DebugLoc", DL);
+  io.mapRequired("Function", FN);
+  io.mapOptional("Hotness", OptDiag->Hotness);
+  io.mapOptional("Args", OptDiag->Args);
+}
+
+template <> struct MappingTraits<DiagnosticLocation> {
+  static void mapping(IO &io, DiagnosticLocation &DL) {
     assert(io.outputting() && "input not yet implemented");
 
-    auto *Scope = cast<DIScope>(DL.getScope());
-    StringRef File = Scope->getFilename();
+    StringRef File = DL.getFilename();
     unsigned Line = DL.getLine();
-    unsigned Col = DL.getCol();
+    unsigned Col = DL.getColumn();
 
     io.mapRequired("File", File);
     io.mapRequired("Line", Line);
@@ -116,8 +134,8 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
   static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
     assert(io.outputting() && "input not yet implemented");
     io.mapRequired(A.Key.data(), A.Val);
-    if (A.DLoc)
-      io.mapOptional("DebugLoc", A.DLoc);
+    if (A.Loc.isValid())
+      io.mapOptional("DebugLoc", A.Loc);
   }
 };
 
@@ -127,18 +145,27 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
 LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
 
 void OptimizationRemarkEmitter::computeHotness(
-    DiagnosticInfoOptimizationBase &OptDiag) {
-  Value *V = OptDiag.getCodeRegion();
+    DiagnosticInfoIROptimization &OptDiag) {
+  const Value *V = OptDiag.getCodeRegion();
   if (V)
     OptDiag.setHotness(computeHotness(V));
 }
 
-void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
+void OptimizationRemarkEmitter::emit(
+    DiagnosticInfoOptimizationBase &OptDiagBase) {
+  auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase);
   computeHotness(OptDiag);
+  // If a diagnostic has a hotness value, then only emit it if its hotness
+  // meets the threshold.
+  if (OptDiag.getHotness() &&
+      *OptDiag.getHotness() <
+          F->getContext().getDiagnosticsHotnessThreshold()) {
+    return;
+  }
 
   yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
   if (Out) {
-    auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag);
+    auto *P = const_cast<DiagnosticInfoOptimizationBase *>(&OptDiagBase);
     *Out << P;
   }
   // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
@@ -147,72 +174,6 @@ void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
     F->getContext().diagnose(OptDiag);
 }
 
-void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
-                                                       const DebugLoc &DLoc,
-                                                       const Value *V,
-                                                       const Twine &Msg) {
-  LLVMContext &Ctx = F->getContext();
-  Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
-                                                       Loop *L,
-                                                       const Twine &Msg) {
-  emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
-    const char *PassName, const DebugLoc &DLoc, const Value *V,
-    const Twine &Msg, bool IsVerbose) {
-  LLVMContext &Ctx = F->getContext();
-  if (!IsVerbose || shouldEmitVerbose())
-    Ctx.diagnose(
-        OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
-    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
-  emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg,
-                               IsVerbose);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
-    const char *PassName, const DebugLoc &DLoc, const Value *V,
-    const Twine &Msg, bool IsVerbose) {
-  LLVMContext &Ctx = F->getContext();
-  if (!IsVerbose || shouldEmitVerbose())
-    Ctx.diagnose(
-        OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
-    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
-  emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(),
-                                 Msg, IsVerbose);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute(
-    const char *PassName, const DebugLoc &DLoc, const Value *V,
-    const Twine &Msg) {
-  LLVMContext &Ctx = F->getContext();
-  Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg,
-                                                   computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
-    const char *PassName, const DebugLoc &DLoc, const Value *V,
-    const Twine &Msg) {
-  LLVMContext &Ctx = F->getContext();
-  Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg,
-                                                  computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
-    const char *PassName, Loop *L, const Twine &Msg) {
-  emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(),
-                                         L->getHeader(), Msg);
-}
-
 OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
     : FunctionPass(ID) {
   initializeOptimizationRemarkEmitterWrapperPassPass(
@@ -222,7 +183,7 @@ OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
 bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
   BlockFrequencyInfo *BFI;
 
-  if (Fn.getContext().getDiagnosticHotnessRequested())
+  if (Fn.getContext().getDiagnosticsHotnessRequested())
     BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
   else
     BFI = nullptr;
@@ -244,7 +205,7 @@ OptimizationRemarkEmitterAnalysis::run(Function &F,
                                        FunctionAnalysisManager &AM) {
   BlockFrequencyInfo *BFI;
 
-  if (F.getContext().getDiagnosticHotnessRequested())
+  if (F.getContext().getDiagnosticsHotnessRequested())
     BFI = &AM.getResult<BlockFrequencyAnalysis>(F);
   else
     BFI = nullptr;
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
index 0f0016f..a04c0ae 100644
--- a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
   assert(II != IE && "Instruction not found?");
   assert((Inst == A || Inst == B) && "Should find A or B");
   LastInstFound = II;
-  return Inst == A;
+  return Inst != B;
 }
 
 /// \brief Find out whether \p A dominates \p B, meaning whether \p A
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index 84ecd4a..682af4d 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
     if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(),
-                                   GEPOps, DL, TLI, DT, AC)) {
+                                   GEPOps, {DL, TLI, DT, AC})) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
 
@@ -276,7 +276,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
index cb9438a..1caf151 100644
--- a/contrib/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -31,6 +31,15 @@ char PostDominatorTreeWrapperPass::ID = 0;
 INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree",
                 "Post-Dominator Tree Construction", true, true)
 
+bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
+                                   FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis, all analyses on functions, or the function's
+  // CFG have been preserved.
+  auto PAC = PA.getChecker<PostDominatorTreeAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+           PAC.preservedSet<CFGAnalyses>());
+}
+
 bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) {
   DT.recalculate(F);
   return false;
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 16d3614..12b86da 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -12,9 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
@@ -55,22 +56,43 @@ static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
 // The profile summary metadata may be attached either by the frontend or by
 // any backend passes (IR level instrumentation, for example). This method
 // checks if the Summary is null and if so checks if the summary metadata is now
-// available in the module and parses it to get the Summary object.
-void ProfileSummaryInfo::computeSummary() {
+// available in the module and parses it to get the Summary object. Returns true
+// if a valid Summary is available.
+bool ProfileSummaryInfo::computeSummary() {
   if (Summary)
-    return;
+    return true;
   auto *SummaryMD = M.getProfileSummary();
   if (!SummaryMD)
-    return;
+    return false;
   Summary.reset(ProfileSummary::getFromMD(SummaryMD));
+  return true;
+}
+
+Optional<uint64_t>
+ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
+                                    BlockFrequencyInfo *BFI) {
+  if (!Inst)
+    return None;
+  assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+         "We can only get profile count for call/invoke instruction.");
+  if (hasSampleProfile()) {
+    // In sample PGO mode, check if there is a profile metadata on the
+    // instruction. If it is present, determine hotness solely based on that,
+    // since the sampled entry count may not be accurate.
+    uint64_t TotalCount;
+    if (Inst->extractProfTotalWeight(TotalCount))
+      return TotalCount;
+  }
+  if (BFI)
+    return BFI->getBlockProfileCount(Inst->getParent());
+  return None;
 }
 
 /// Returns true if the function's entry is hot. If it returns false, it
 /// either means it is not hot or it is unknown whether it is hot or not (for
 /// example, no profile data is available).
 bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
-  computeSummary();
-  if (!F || !Summary)
+  if (!F || !computeSummary())
     return false;
   auto FunctionCount = F->getEntryCount();
   // FIXME: The heuristic used below for determining hotness is based on
@@ -79,17 +101,53 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
   return FunctionCount && isHotCount(FunctionCount.getValue());
 }
 
+/// Returns true if the function's entry or total call edge count is hot.
+/// If it returns false, it either means it is not hot or it is unknown
+/// whether it is hot or not (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) {
+  if (!F || !computeSummary())
+    return false;
+  if (auto FunctionCount = F->getEntryCount())
+    if (isHotCount(FunctionCount.getValue()))
+      return true;
+
+  uint64_t TotalCallCount = 0;
+  for (const auto &BB : *F)
+    for (const auto &I : BB)
+      if (isa<CallInst>(I) || isa<InvokeInst>(I))
+        if (auto CallCount = getProfileCount(&I, nullptr))
+          TotalCallCount += CallCount.getValue();
+  return isHotCount(TotalCallCount);
+}
+
+/// Returns true if the function's entry and total call edge count is cold.
+/// If it returns false, it either means it is not cold or it is unknown
+/// whether it is cold or not (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) {
+  if (!F || !computeSummary())
+    return false;
+  if (auto FunctionCount = F->getEntryCount())
+    if (!isColdCount(FunctionCount.getValue()))
+      return false;
+  
+  uint64_t TotalCallCount = 0;
+  for (const auto &BB : *F)
+    for (const auto &I : BB) 
+      if (isa<CallInst>(I) || isa<InvokeInst>(I))
+        if (auto CallCount = getProfileCount(&I, nullptr))
+          TotalCallCount += CallCount.getValue();
+  return isColdCount(TotalCallCount);
+}
+
 /// Returns true if the function's entry is a cold. If it returns false, it
 /// either means it is not cold or it is unknown whether it is cold or not (for
 /// example, no profile data is available).
 bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
-  computeSummary();
   if (!F)
     return false;
-  if (F->hasFnAttribute(Attribute::Cold)) {
+  if (F->hasFnAttribute(Attribute::Cold))
     return true;
-  }
-  if (!Summary)
+  if (!computeSummary())
     return false;
   auto FunctionCount = F->getEntryCount();
   // FIXME: The heuristic used below for determining coldness is based on
@@ -100,9 +158,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
 
 /// Compute the hot and cold thresholds.
 void ProfileSummaryInfo::computeThresholds() {
-  if (!Summary)
-    computeSummary();
-  if (!Summary)
+  if (!computeSummary())
     return;
   auto &DetailedSummary = Summary->getDetailedSummary();
   HotCountThreshold =
@@ -125,20 +181,25 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
 
 bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
   auto Count = BFI->getBlockProfileCount(B);
-  if (Count && isHotCount(*Count))
-    return true;
-  // Use extractProfTotalWeight to get BB count.
-  // For Sample PGO, BFI may not provide accurate BB count due to errors
-  // magnified during sample count propagation. This serves as a backup plan
-  // to ensure all hot BB will not be missed.
-  // The query currently has false positives as branch instruction cloning does
-  // not update/scale branch weights. Unlike false negatives, this will not cause
-  // performance problem.
-  uint64_t TotalCount;
-  if (B->getTerminator()->extractProfTotalWeight(TotalCount) &&
-      isHotCount(TotalCount))
-    return true;
-  return false;
+  return Count && isHotCount(*Count);
+}
+
+bool ProfileSummaryInfo::isColdBB(const BasicBlock *B,
+                                  BlockFrequencyInfo *BFI) {
+  auto Count = BFI->getBlockProfileCount(B);
+  return Count && isColdCount(*Count);
+}
+
+bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
+                                       BlockFrequencyInfo *BFI) {
+  auto C = getProfileCount(CS.getInstruction(), BFI);
+  return C && isHotCount(*C);
+}
+
+bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
+                                        BlockFrequencyInfo *BFI) {
+  auto C = getProfileCount(CS.getInstruction(), BFI);
+  return C && isColdCount(*C);
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 8c084dd..9004873 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -10,28 +10,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/RegionInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfoImpl.h"
-#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #ifndef NDEBUG
 #include "llvm/Analysis/RegionPrinter.h"
 #endif
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "region"
 
 namespace llvm {
+
 template class RegionBase<RegionTraits<Function>>;
 template class RegionNodeBase<RegionTraits<Function>>;
 template class RegionInfoBase<RegionTraits<Function>>;
-}
+
+} // end namespace llvm
 
 STATISTIC(numRegions,       "The # of regions");
 STATISTIC(numSimpleRegions, "The # of simple regions");
@@ -44,7 +45,6 @@ VerifyRegionInfoX(
   cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo),
   cl::desc("Verify region info (time consuming)"));
 
-
 static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
   cl::location(RegionInfo::printStyle),
   cl::Hidden,
@@ -56,7 +56,6 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
     clEnumValN(Region::PrintRN, "rn",
                "print regions in detail with element_iterator")));
 
-
 //===----------------------------------------------------------------------===//
 // Region implementation
 //
@@ -68,19 +67,23 @@ Region::Region(BasicBlock *Entry, BasicBlock *Exit,
 
 }
 
-Region::~Region() { }
+Region::~Region() = default;
 
 //===----------------------------------------------------------------------===//
 // RegionInfo implementation
 //
 
-RegionInfo::RegionInfo() :
-  RegionInfoBase<RegionTraits<Function>>() {
+RegionInfo::RegionInfo() = default;
 
-}
-
-RegionInfo::~RegionInfo() {
+RegionInfo::~RegionInfo() = default;
 
+bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+                            FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis, all analyses on functions, or the function's
+  // CFG have been preserved.
+  auto PAC = PA.getChecker<RegionInfoAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+           PAC.preservedSet<CFGAnalyses>());
 }
 
 void RegionInfo::updateStatistics(Region *R) {
@@ -117,9 +120,7 @@ RegionInfoPass::RegionInfoPass() : FunctionPass(ID) {
   initializeRegionInfoPassPass(*PassRegistry::getPassRegistry());
 }
 
-RegionInfoPass::~RegionInfoPass() {
-
-}
+RegionInfoPass::~RegionInfoPass() = default;
 
 bool RegionInfoPass::runOnFunction(Function &F) {
   releaseMemory();
@@ -172,10 +173,12 @@ INITIALIZE_PASS_END(RegionInfoPass, "regions",
 // the link time optimization.
 
 namespace llvm {
+
   FunctionPass *createRegionInfoPass() {
     return new RegionInfoPass();
   }
-}
+
+} // end namespace llvm
 
 //===----------------------------------------------------------------------===//
 // RegionInfoAnalysis implementation
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index 7358aa6..b38e622 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/OptBisect.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -206,6 +207,8 @@ public:
 
     return false;
   }
+
+  StringRef getPassName() const override { return "Print Region IR"; }
 };
 
 char PrintRegionPass::ID = 0;
@@ -278,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
                                   const std::string &Banner) const {
   return new PrintRegionPass(Banner, O);
 }
+
+bool RegionPass::skipRegion(Region &R) const {
+  Function &F = *R.getEntry()->getParent();
+  if (!F.getContext().getOptBisect().shouldRunPass(this, R))
+    return true;
+
+  if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+    // Report this only once per function.
+    if (R.getEntry() == &F.getEntryBlock())
+      DEBUG(dbgs() << "Skipping pass '" << getPassName()
+            << "' on function " << F.getName() << "\n");
+    return true;
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index 30a4e01..5986b8c 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -9,14 +9,14 @@
 // Print out the region tree of a function using dotty/graphviz.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index ed328f1..9539fd7 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -89,9 +89,10 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -125,18 +126,47 @@ static cl::opt<bool>
 static cl::opt<unsigned> MulOpsInlineThreshold(
     "scev-mulops-inline-threshold", cl::Hidden,
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
-    cl::init(1000));
+    cl::init(32));
+
+static cl::opt<unsigned> AddOpsInlineThreshold(
+    "scev-addops-inline-threshold", cl::Hidden,
+    cl::desc("Threshold for inlining addition operands into a SCEV"),
+    cl::init(500));
 
 static cl::opt<unsigned> MaxSCEVCompareDepth(
     "scalar-evolution-max-scev-compare-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
     cl::init(32));
 
+static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
+    "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
+    cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
+    cl::init(2));
+
 static cl::opt<unsigned> MaxValueCompareDepth(
     "scalar-evolution-max-value-compare-depth", cl::Hidden,
     cl::desc("Maximum depth of recursive value complexity comparisons"),
     cl::init(2));
 
+static cl::opt<unsigned>
+    MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
+                  cl::desc("Maximum depth of recursive arithmetics"),
+                  cl::init(32));
+
+static cl::opt<unsigned> MaxConstantEvolvingDepth(
+    "scalar-evolution-max-constant-evolving-depth", cl::Hidden,
+    cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
+
+static cl::opt<unsigned>
+    MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
+                cl::desc("Maximum depth of recursive SExt/ZExt"),
+                cl::init(8));
+
+static cl::opt<unsigned>
+    MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
+                  cl::desc("Max coefficients in AddRec during evolving"),
+                  cl::init(16));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -145,11 +175,12 @@ static cl::opt<unsigned> MaxValueCompareDepth(
 // Implementation of the SCEV class.
 //
 
-LLVM_DUMP_METHOD
-void SCEV::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SCEV::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
+#endif
 
 void SCEV::print(raw_ostream &OS) const {
   switch (static_cast<SCEVTypes>(getSCEVType())) {
@@ -300,7 +331,7 @@ bool SCEV::isOne() const {
 
 bool SCEV::isAllOnesValue() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
-    return SC->getValue()->isAllOnesValue();
+    return SC->getValue()->isMinusOne();
   return false;
 }
 
@@ -563,7 +594,7 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
 static int CompareSCEVComplexity(
     SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
     const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
-    unsigned Depth = 0) {
+    DominatorTree &DT, unsigned Depth = 0) {
   // Fast-path: SCEVs are uniqued so we can do a quick equality check.
   if (LHS == RHS)
     return 0;
@@ -608,12 +639,19 @@ static int CompareSCEVComplexity(
     const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
     const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
 
-    // Compare addrec loop depths.
+    // There is always a dominance between two recs that are used by one SCEV,
+    // so we can safely sort recs by loop header dominance. We require such
+    // order in getAddExpr.
     const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
     if (LLoop != RLoop) {
-      unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
-      if (LDepth != RDepth)
-        return (int)LDepth - (int)RDepth;
+      const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
+      assert(LHead != RHead && "Two loops share the same header?");
+      if (DT.dominates(LHead, RHead))
+        return 1;
+      else
+        assert(DT.dominates(RHead, LHead) &&
+               "No dominance between recurrences used by one SCEV?");
+      return -1;
     }
 
     // Addrec complexity grows with operand count.
@@ -624,7 +662,7 @@ static int CompareSCEVComplexity(
     // Lexicographically compare.
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
-                                    RA->getOperand(i), Depth + 1);
+                                    RA->getOperand(i), DT,  Depth + 1);
       if (X != 0)
         return X;
     }
@@ -648,7 +686,7 @@ static int CompareSCEVComplexity(
       if (i >= RNumOps)
         return 1;
       int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
-                                    RC->getOperand(i), Depth + 1);
+                                    RC->getOperand(i), DT, Depth + 1);
       if (X != 0)
         return X;
     }
@@ -662,10 +700,10 @@ static int CompareSCEVComplexity(
 
     // Lexicographically compare udiv expressions.
     int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
-                                  Depth + 1);
+                                  DT, Depth + 1);
     if (X != 0)
       return X;
-    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT,
                               Depth + 1);
     if (X == 0)
       EqCacheSCEV.insert({LHS, RHS});
@@ -680,7 +718,7 @@ static int CompareSCEVComplexity(
 
     // Compare cast expressions by operand.
     int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
-                                  RC->getOperand(), Depth + 1);
+                                  RC->getOperand(), DT, Depth + 1);
     if (X == 0)
       EqCacheSCEV.insert({LHS, RHS});
     return X;
@@ -703,7 +741,7 @@ static int CompareSCEVComplexity(
 /// land in memory.
 ///
 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
-                              LoopInfo *LI) {
+                              LoopInfo *LI, DominatorTree &DT) {
   if (Ops.size() < 2) return;  // Noop
 
   SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
@@ -711,15 +749,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
     // This is the common case, which also happens to be trivially simple.
     // Special case it.
     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
-    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
+    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0)
       std::swap(LHS, RHS);
     return;
   }
 
   // Do the rough sort by complexity.
   std::stable_sort(Ops.begin(), Ops.end(),
-                   [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
-                     return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+                   [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) {
+                     return
+                         CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0;
                    });
 
   // Now that we are sorted by complexity, group elements of the same
@@ -1073,7 +1112,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
     APInt Mult(W, i);
     unsigned TwoFactors = Mult.countTrailingZeros();
     T += TwoFactors;
-    Mult = Mult.lshr(TwoFactors);
+    Mult.lshrInPlace(TwoFactors);
     OddFactorial *= Mult;
   }
 
@@ -1230,12 +1269,12 @@ static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
   if (SE->isKnownPositive(Step)) {
     *Pred = ICmpInst::ICMP_SLT;
     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
-                           SE->getSignedRange(Step).getSignedMax());
+                           SE->getSignedRangeMax(Step));
   }
   if (SE->isKnownNegative(Step)) {
     *Pred = ICmpInst::ICMP_SGT;
     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
-                           SE->getSignedRange(Step).getSignedMin());
+                           SE->getSignedRangeMin(Step));
   }
   return nullptr;
 }
@@ -1250,13 +1289,14 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
   *Pred = ICmpInst::ICMP_ULT;
 
   return SE->getConstant(APInt::getMinValue(BitWidth) -
-                         SE->getUnsignedRange(Step).getUnsignedMax());
+                         SE->getUnsignedRangeMax(Step));
 }
 
 namespace {
 
 struct ExtendOpTraitsBase {
-  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
+                                                          unsigned);
 };
 
 // Used to make code generic over signed and unsigned overflow.
@@ -1314,7 +1354,7 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
 // "sext/zext(PostIncAR)"
 template <typename ExtendOpTy>
 static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
-                                        ScalarEvolution *SE) {
+                                        ScalarEvolution *SE, unsigned Depth) {
   auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
 
@@ -1361,9 +1401,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
   const SCEV *OperandExtendedStart =
-      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
-                     (SE->*GetExtendExpr)(Step, WideTy));
-  if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
+                     (SE->*GetExtendExpr)(Step, WideTy, Depth));
+  if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
     if (PreAR && AR->getNoWrapFlags(WrapType)) {
       // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
       // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
@@ -1388,15 +1428,17 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
 // Get the normalized zero or sign extended expression for this AddRec's Start.
 template <typename ExtendOpTy>
 static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
-                                        ScalarEvolution *SE) {
+                                        ScalarEvolution *SE,
+                                        unsigned Depth) {
   auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
 
-  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
   if (!PreStart)
-    return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+    return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
 
-  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
-                        (SE->*GetExtendExpr)(PreStart, Ty));
+  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
+                                             Depth),
+                        (SE->*GetExtendExpr)(PreStart, Ty, Depth));
 }
 
 // Try to prove away overflow by looking at "nearby" add recurrences.  A
@@ -1476,8 +1518,8 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
   return false;
 }
 
-const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
-                                               Type *Ty) {
+const SCEV *
+ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -1491,7 +1533,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
 
   // zext(zext(x)) --> zext(x)
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
-    return getZeroExtendExpr(SZ->getOperand(), Ty);
+    return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
 
   // Before doing any expensive analysis, check to see if we've already
   // computed a SCEV for this Op and Ty.
@@ -1501,6 +1543,12 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  if (Depth > MaxExtDepth) {
+    SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                     Op, Ty);
+    UniqueSCEVs.InsertNode(S, IP);
+    return S;
+  }
 
   // zext(trunc(x)) --> zext(x) or x or trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1535,8 +1583,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       // we don't need to do any further analysis.
       if (AR->hasNoUnsignedWrap())
         return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1560,37 +1608,49 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
-          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
-          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
+                                        SCEV::FlagAnyWrap, Depth + 1);
+          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
+                                                          SCEV::FlagAnyWrap,
+                                                          Depth + 1),
+                                               WideTy, Depth + 1);
+          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
           const SCEV *WideMaxBECount =
-            getZeroExtendExpr(CastedMaxBECount, WideTy);
+            getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
           const SCEV *OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
-                                  getZeroExtendExpr(Step, WideTy)));
+                                  getZeroExtendExpr(Step, WideTy, Depth + 1),
+                                  SCEV::FlagAnyWrap, Depth + 1),
+                       SCEV::FlagAnyWrap, Depth + 1);
           if (ZAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NUW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getZeroExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
           OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
-                                  getSignExtendExpr(Step, WideTy)));
+                                  getSignExtendExpr(Step, WideTy, Depth + 1),
+                                  SCEV::FlagAnyWrap, Depth + 1),
+                       SCEV::FlagAnyWrap, Depth + 1);
           if (ZAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NW, which is propagated to this AddRec.
             // Negative step causes unsigned wrap, but it still can't self-wrap.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getSignExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
         }
       }
@@ -1611,7 +1671,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
         // is safe.
         if (isKnownPositive(Step)) {
           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
-                                      getUnsignedRange(Step).getUnsignedMax());
+                                      getUnsignedRangeMax(Step));
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
@@ -1621,12 +1681,14 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getZeroExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
-                                      getSignedRange(Step).getSignedMin());
+                                      getSignedRangeMin(Step));
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
@@ -1637,8 +1699,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getSignExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
         }
       }
@@ -1646,8 +1710,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
         return getAddRecExpr(
-            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
-            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
+            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
       }
     }
 
@@ -1658,8 +1722,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       // commute the zero extension with the addition operation.
       SmallVector<const SCEV *, 4> Ops;
       for (const auto *Op : SA->operands())
-        Ops.push_back(getZeroExtendExpr(Op, Ty));
-      return getAddExpr(Ops, SCEV::FlagNUW);
+        Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
+      return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
     }
   }
 
@@ -1672,8 +1736,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   return S;
 }
 
-const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
-                                               Type *Ty) {
+const SCEV *
+ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
          "This is not an extending conversion!");
   assert(isSCEVable(Ty) &&
@@ -1687,11 +1751,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
 
   // sext(sext(x)) --> sext(x)
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
-    return getSignExtendExpr(SS->getOperand(), Ty);
+    return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
 
   // sext(zext(x)) --> zext(x)
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
-    return getZeroExtendExpr(SZ->getOperand(), Ty);
+    return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
 
   // Before doing any expensive analysis, check to see if we've already
   // computed a SCEV for this Op and Ty.
@@ -1701,6 +1765,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  // Limit recursion depth.
+  if (Depth > MaxExtDepth) {
+    SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                     Op, Ty);
+    UniqueSCEVs.InsertNode(S, IP);
+    return S;
+  }
 
   // sext(trunc(x)) --> sext(x) or x or trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
@@ -1726,8 +1797,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           const APInt &C2 = SC2->getAPInt();
           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
               C2.ugt(C1) && C2.isPowerOf2())
-            return getAddExpr(getSignExtendExpr(SC1, Ty),
-                              getSignExtendExpr(SMul, Ty));
+            return getAddExpr(getSignExtendExpr(SC1, Ty, Depth + 1),
+                              getSignExtendExpr(SMul, Ty, Depth + 1),
+                              SCEV::FlagAnyWrap, Depth + 1);
         }
       }
     }
@@ -1738,8 +1810,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       // commute the sign extension with the addition operation.
       SmallVector<const SCEV *, 4> Ops;
       for (const auto *Op : SA->operands())
-        Ops.push_back(getSignExtendExpr(Op, Ty));
-      return getAddExpr(Ops, SCEV::FlagNSW);
+        Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
+      return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
     }
   }
   // If the input value is a chrec scev, and we can prove that the value
@@ -1762,8 +1834,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       // we don't need to do any further analysis.
       if (AR->hasNoSignedWrap())
         return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-            getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+            getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1787,29 +1859,39 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
-          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
-          const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
+                                        SCEV::FlagAnyWrap, Depth + 1);
+          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
+                                                          SCEV::FlagAnyWrap,
+                                                          Depth + 1),
+                                               WideTy, Depth + 1);
+          const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
           const SCEV *WideMaxBECount =
-            getZeroExtendExpr(CastedMaxBECount, WideTy);
+            getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
           const SCEV *OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
-                                  getSignExtendExpr(Step, WideTy)));
+                                  getSignExtendExpr(Step, WideTy, Depth + 1),
+                                  SCEV::FlagAnyWrap, Depth + 1),
+                       SCEV::FlagAnyWrap, Depth + 1);
           if (SAdd == OperandExtendedAdd) {
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getSignExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
           OperandExtendedAdd =
             getAddExpr(WideStart,
                        getMulExpr(WideMaxBECount,
-                                  getZeroExtendExpr(Step, WideTy)));
+                                  getZeroExtendExpr(Step, WideTy, Depth + 1),
+                                  SCEV::FlagAnyWrap, Depth + 1),
+                       SCEV::FlagAnyWrap, Depth + 1);
           if (SAdd == OperandExtendedAdd) {
             // If AR wraps around then
             //
@@ -1823,8 +1905,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
 
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(
-                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+                                                         Depth + 1),
+                getZeroExtendExpr(Step, Ty, Depth + 1), L,
+                AR->getNoWrapFlags());
           }
         }
       }
@@ -1855,8 +1939,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
           return getAddRecExpr(
-              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-              getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+              getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
         }
       }
 
@@ -1870,25 +1954,26 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         const APInt &C2 = SC2->getAPInt();
         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
             C2.isPowerOf2()) {
-          Start = getSignExtendExpr(Start, Ty);
+          Start = getSignExtendExpr(Start, Ty, Depth + 1);
           const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
                                             AR->getNoWrapFlags());
-          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty, Depth + 1),
+                            SCEV::FlagAnyWrap, Depth + 1);
         }
       }
 
       if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
         const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
         return getAddRecExpr(
-            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
-            getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
+            getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
       }
     }
 
   // If the input value is provably positive and we could not simplify
   // away the sext build a zext instead.
   if (isKnownNonNegative(Op))
-    return getZeroExtendExpr(Op, Ty);
+    return getZeroExtendExpr(Op, Ty, Depth + 1);
 
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
@@ -2093,9 +2178,66 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
   return Flags;
 }
 
+bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
+  if (!isLoopInvariant(S, L))
+    return false;
+  // If a value depends on a SCEVUnknown which is defined after the loop, we
+  // conservatively assume that we cannot calculate it at the loop's entry.
+  struct FindDominatedSCEVUnknown {
+    bool Found = false;
+    const Loop *L;
+    DominatorTree &DT;
+    LoopInfo &LI;
+
+    FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
+        : L(L), DT(DT), LI(LI) {}
+
+    bool checkSCEVUnknown(const SCEVUnknown *SU) {
+      if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
+        if (DT.dominates(L->getHeader(), I->getParent()))
+          Found = true;
+        else
+          assert(DT.dominates(I->getParent(), L->getHeader()) &&
+                 "No dominance relationship between SCEV and loop?");
+      }
+      return false;
+    }
+
+    bool follow(const SCEV *S) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+      case scConstant:
+        return false;
+      case scAddRecExpr:
+      case scTruncate:
+      case scZeroExtend:
+      case scSignExtend:
+      case scAddExpr:
+      case scMulExpr:
+      case scUMaxExpr:
+      case scSMaxExpr:
+      case scUDivExpr:
+        return true;
+      case scUnknown:
+        return checkSCEVUnknown(cast<SCEVUnknown>(S));
+      case scCouldNotCompute:
+        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+      }
+      return false;
+    }
+
+    bool isDone() { return Found; }
+  };
+
+  FindDominatedSCEVUnknown FSU(L, DT, LI);
+  SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
+  ST.visitAll(S);
+  return !FSU.Found;
+}
+
 /// Get a canonical add expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                                        SCEV::NoWrapFlags Flags) {
+                                        SCEV::NoWrapFlags Flags,
+                                        unsigned Depth) {
   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
          "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty add!");
@@ -2108,7 +2250,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);
 
   Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
 
@@ -2134,6 +2276,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     if (Ops.size() == 1) return Ops[0];
   }
 
+  // Limit recursion calls depth.
+  if (Depth > MaxArithDepth)
+    return getOrCreateAddExpr(Ops, Flags);
+
   // Okay, check to see if the same value occurs in the operand list more than
   // once.  If so, merge them together into an multiply expression.  Since we
   // sorted the list, these values are required to be adjacent.
@@ -2147,7 +2293,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         ++Count;
       // Merge the values into a multiply.
       const SCEV *Scale = getConstant(Ty, Count);
-      const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+      const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
       if (Ops.size() == Count)
         return Mul;
       Ops[i] = Mul;
@@ -2197,7 +2343,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
           }
         }
         if (Ok)
-          LargeOps.push_back(getMulExpr(LargeMulOps));
+          LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
       } else {
         Ok = false;
         break;
@@ -2205,7 +2351,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     }
     if (Ok) {
       // Evaluate the expression in the larger type.
-      const SCEV *Fold = getAddExpr(LargeOps, Flags);
+      const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
       // If it folds to something simple, use it. Otherwise, don't.
       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
         return getTruncateExpr(Fold, DstType);
@@ -2220,6 +2366,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   if (Idx < Ops.size()) {
     bool DeletedAdd = false;
     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+      if (Ops.size() > AddOpsInlineThreshold ||
+          Add->getNumOperands() > AddOpsInlineThreshold)
+        break;
       // If we have an add, expand the add operands onto the end of the operands
       // list.
       Ops.erase(Ops.begin()+Idx);
@@ -2231,7 +2380,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     // and they are not necessarily sorted.  Recurse to resort and resimplify
     // any operands we just acquired.
     if (DeletedAdd)
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
   }
 
   // Skip over the add expression until we get to a multiply.
@@ -2266,13 +2415,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         Ops.push_back(getConstant(AccumulatedConstant));
       for (auto &MulOp : MulOpLists)
         if (MulOp.first != 0)
-          Ops.push_back(getMulExpr(getConstant(MulOp.first),
-                                   getAddExpr(MulOp.second)));
+          Ops.push_back(getMulExpr(
+              getConstant(MulOp.first),
+              getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
+              SCEV::FlagAnyWrap, Depth + 1));
       if (Ops.empty())
         return getZero(Ty);
       if (Ops.size() == 1)
         return Ops[0];
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
   }
 
@@ -2295,11 +2446,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                 Mul->op_begin()+MulOp);
             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
-            InnerMul = getMulExpr(MulOps);
+            InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
           }
-          const SCEV *One = getOne(Ty);
-          const SCEV *AddOne = getAddExpr(One, InnerMul);
-          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+          SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
+          const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
+                                            SCEV::FlagAnyWrap, Depth + 1);
           if (Ops.size() == 2) return OuterMul;
           if (AddOp < Idx) {
             Ops.erase(Ops.begin()+AddOp);
@@ -2309,7 +2461,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             Ops.erase(Ops.begin()+AddOp-1);
           }
           Ops.push_back(OuterMul);
-          return getAddExpr(Ops);
+          return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
         }
 
       // Check this multiply against other multiplies being added together.
@@ -2328,22 +2480,25 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                   Mul->op_begin()+MulOp);
               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
-              InnerMul1 = getMulExpr(MulOps);
+              InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
             }
             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
                                                   OtherMul->op_begin()+OMulOp);
               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
-              InnerMul2 = getMulExpr(MulOps);
+              InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
             }
-            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
-            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
+            const SCEV *InnerMulSum =
+                getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
+                                              SCEV::FlagAnyWrap, Depth + 1);
             if (Ops.size() == 2) return OuterMul;
             Ops.erase(Ops.begin()+Idx);
             Ops.erase(Ops.begin()+OtherMulIdx-1);
             Ops.push_back(OuterMul);
-            return getAddExpr(Ops);
+            return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
           }
       }
     }
@@ -2363,7 +2518,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -2379,7 +2534,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
       // This follows from the fact that the no-wrap flags on the outer add
       // expression are applicable on the 0th iteration, when the add recurrence
       // will be equal to its start value.
-      AddRecOps[0] = getAddExpr(LIOps, Flags);
+      AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer add and the inner addrec are guaranteed to have no overflow.
@@ -2396,7 +2551,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
           Ops[i] = NewRec;
           break;
         }
-      return getAddExpr(Ops);
+      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
 
     // Okay, if there weren't any loop invariants to be folded, check to see if
@@ -2404,31 +2559,40 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     // added together.  If so, we can fold them.
     for (unsigned OtherIdx = Idx+1;
          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-         ++OtherIdx)
+         ++OtherIdx) {
+      // We expect the AddRecExpr's to be sorted in reverse dominance order,
+      // so that the 1st found AddRecExpr is dominated by all others.
+      assert(DT.dominates(
+           cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
+           AddRec->getLoop()->getHeader()) &&
+        "AddRecExprs are not sorted in reverse dominance order?");
       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
                                                AddRec->op_end());
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-             ++OtherIdx)
-          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
-            if (OtherAddRec->getLoop() == AddRecLoop) {
-              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
-                   i != e; ++i) {
-                if (i >= AddRecOps.size()) {
-                  AddRecOps.append(OtherAddRec->op_begin()+i,
-                                   OtherAddRec->op_end());
-                  break;
-                }
-                AddRecOps[i] = getAddExpr(AddRecOps[i],
-                                          OtherAddRec->getOperand(i));
+             ++OtherIdx) {
+          const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+          if (OtherAddRec->getLoop() == AddRecLoop) {
+            for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                 i != e; ++i) {
+              if (i >= AddRecOps.size()) {
+                AddRecOps.append(OtherAddRec->op_begin()+i,
+                                 OtherAddRec->op_end());
+                break;
               }
-              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+              SmallVector<const SCEV *, 2> TwoOps = {
+                  AddRecOps[i], OtherAddRec->getOperand(i)};
+              AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
             }
+            Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+          }
+        }
         // Step size has changed, so we cannot guarantee no self-wraparound.
         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
-        return getAddExpr(Ops);
+        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
       }
+    }
 
     // Otherwise couldn't fold anything into this recurrence.  Move onto the
     // next one.
@@ -2436,17 +2600,44 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 
   // Okay, it looks like we really DO need an add expr.  Check to see if we
   // already have one, otherwise create a new one.
+  return getOrCreateAddExpr(Ops, Flags);
+}
+
+const SCEV *
+ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                    SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scAddExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = nullptr;
   SCEVAddExpr *S =
-    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+      static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
-    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+    S = new (SCEVAllocator)
+        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+const SCEV *
+ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                    SCEV::NoWrapFlags Flags) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(scMulExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = nullptr;
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
                                         O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
@@ -2506,7 +2697,8 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) {
 
 /// Get a canonical multiply expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
-                                        SCEV::NoWrapFlags Flags) {
+                                        SCEV::NoWrapFlags Flags,
+                                        unsigned Depth) {
   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
          "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty mul!");
@@ -2519,10 +2711,14 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);
 
   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
 
+  // Limit recursion calls depth.
+  if (Depth > MaxArithDepth)
+    return getOrCreateMulExpr(Ops, Flags);
+
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
@@ -2534,8 +2730,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
           // apply this transformation as well.
           if (Add->getNumOperands() == 2)
             if (containsConstantSomewhere(Add))
-              return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
-                                getMulExpr(LHSC, Add->getOperand(1)));
+              return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
+                                           SCEV::FlagAnyWrap, Depth + 1),
+                                getMulExpr(LHSC, Add->getOperand(1),
+                                           SCEV::FlagAnyWrap, Depth + 1),
+                                SCEV::FlagAnyWrap, Depth + 1);
 
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -2549,7 +2748,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     }
 
     // If we are left with a constant one being multiplied, strip it off.
-    if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) {
       Ops.erase(Ops.begin());
       --Idx;
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
@@ -2563,17 +2762,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
           SmallVector<const SCEV *, 4> NewOps;
           bool AnyFolded = false;
           for (const SCEV *AddOp : Add->operands()) {
-            const SCEV *Mul = getMulExpr(Ops[0], AddOp);
+            const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
+                                         Depth + 1);
             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
             NewOps.push_back(Mul);
           }
           if (AnyFolded)
-            return getAddExpr(NewOps);
+            return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
         } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
           // Negation preserves a recurrence's no self-wrap property.
           SmallVector<const SCEV *, 4> Operands;
           for (const SCEV *AddRecOp : AddRec->operands())
-            Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+            Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
+                                          Depth + 1));
 
           return getAddRecExpr(Operands, AddRec->getLoop(),
                                AddRec->getNoWrapFlags(SCEV::FlagNW));
@@ -2595,18 +2796,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
       if (Ops.size() > MulOpsInlineThreshold)
         break;
-      // If we have an mul, expand the mul operands onto the end of the operands
-      // list.
+      // If we have an mul, expand the mul operands onto the end of the
+      // operands list.
       Ops.erase(Ops.begin()+Idx);
       Ops.append(Mul->op_begin(), Mul->op_end());
       DeletedMul = true;
     }
 
-    // If we deleted at least one mul, we added operands to the end of the list,
-    // and they are not necessarily sorted.  Recurse to resort and resimplify
-    // any operands we just acquired.
+    // If we deleted at least one mul, we added operands to the end of the
+    // list, and they are not necessarily sorted.  Recurse to resort and
+    // resimplify any operands we just acquired.
     if (DeletedMul)
-      return getMulExpr(Ops);
+      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
   }
 
   // If there are any add recurrences in the operands list, see if any other
@@ -2617,13 +2818,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
   // Scan over all recurrences, trying to fold loop invariants into them.
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
-    // Scan all of the other operands to this mul and add them to the vector if
-    // they are loop invariant w.r.t. the recurrence.
+    // Scan all of the other operands to this mul and add them to the vector
+    // if they are loop invariant w.r.t. the recurrence.
     SmallVector<const SCEV *, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -2634,9 +2835,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
       SmallVector<const SCEV *, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
-      const SCEV *Scale = getMulExpr(LIOps);
+      const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
-        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
+                                    SCEV::FlagAnyWrap, Depth + 1));
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer mul and the inner addrec are guaranteed to have no overflow.
@@ -2655,12 +2857,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
           Ops[i] = NewRec;
           break;
         }
-      return getMulExpr(Ops);
+      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
     }
 
-    // Okay, if there weren't any loop invariants to be folded, check to see if
-    // there are multiple AddRec's with the same loop induction variable being
-    // multiplied together.  If so, we can fold them.
+    // Okay, if there weren't any loop invariants to be folded, check to see
+    // if there are multiple AddRec's with the same loop induction variable
+    // being multiplied together.  If so, we can fold them.
 
     // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
     // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
@@ -2681,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
         continue;
 
+      // Limit max number of arguments to avoid creation of unreasonably big
+      // SCEVAddRecs with very complex operands.
+      if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
+          MaxAddRecSize)
+        continue;
+
       bool Overflow = false;
       Type *Ty = AddRec->getType();
       bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
@@ -2702,7 +2910,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
             const SCEV *Term1 = AddRec->getOperand(y-z);
             const SCEV *Term2 = OtherAddRec->getOperand(z);
-            Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+            Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
+                                               SCEV::FlagAnyWrap, Depth + 1),
+                              SCEV::FlagAnyWrap, Depth + 1);
           }
         }
         AddRecOps.push_back(Term);
@@ -2720,7 +2930,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       }
     }
     if (OpsModified)
-      return getMulExpr(Ops);
+      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
 
     // Otherwise couldn't fold anything into this recurrence.  Move onto the
     // next one.
@@ -2728,22 +2938,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
   // Okay, it looks like we really DO need an mul expr.  Check to see if we
   // already have one, otherwise create a new one.
-  FoldingSetNodeID ID;
-  ID.AddInteger(scMulExpr);
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    ID.AddPointer(Ops[i]);
-  void *IP = nullptr;
-  SCEVMulExpr *S =
-    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-  if (!S) {
-    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
-    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
-                                        O, Ops.size());
-    UniqueSCEVs.InsertNode(S, IP);
-  }
-  S->setNoWrapFlags(Flags);
-  return S;
+  return getOrCreateMulExpr(Ops, Flags);
 }
 
 /// Get a canonical unsigned division expression, or something simpler if
@@ -2755,7 +2950,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
          "SCEVUDivExpr operand types don't match!");
 
   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
-    if (RHSC->getValue()->equalsInt(1))
+    if (RHSC->getValue()->isOne())
       return LHS;                               // X udiv 1 --> x
     // If the denominator is zero, the result of the udiv is undefined. Don't
     // try to analyze it, because the resolution chosen here may differ from
@@ -2875,7 +3070,7 @@ static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
   else if (ABW < BBW)
     A = A.zext(BBW);
 
-  return APIntOps::GreatestCommonDivisor(A, B);
+  return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
 }
 
 /// Get a canonical unsigned division expression, or something simpler if
@@ -2889,7 +3084,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
   // end of this file for inspiration.
 
   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
-  if (!Mul)
+  if (!Mul || !Mul->hasNoUnsignedWrap())
     return getUDivExpr(LHS, RHS);
 
   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
@@ -3116,7 +3311,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -3217,7 +3412,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI);
+  GroupByComplexity(Ops, &LI, DT);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -3385,6 +3580,10 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   return getDataLayout().getIntPtrType(Ty);
 }
 
+Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
+  return  getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
+}
+
 const SCEV *ScalarEvolution::getCouldNotCompute() {
   return CouldNotCompute.get();
 }
@@ -3542,7 +3741,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
 }
 
 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
-                                          SCEV::NoWrapFlags Flags) {
+                                          SCEV::NoWrapFlags Flags,
+                                          unsigned Depth) {
   // Fast path: X - X --> 0.
   if (LHS == RHS)
     return getZero(LHS->getType());
@@ -3551,7 +3751,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
   // makes it so that we cannot make much use of NUW.
   auto AddFlags = SCEV::FlagAnyWrap;
   const bool RHSIsNotMinSigned =
-      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+      !getSignedRangeMin(RHS).isMinSignedValue();
   if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
     // Let M be the minimum representable signed value. Then (-1)*RHS
     // signed-wraps if and only if RHS is M. That can happen even for
@@ -3576,7 +3776,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
   // larger scope than intended.
   auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
 
-  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
 }
 
 const SCEV *
@@ -3770,7 +3970,7 @@ public:
       : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+    if (!SE.isLoopInvariant(Expr, L))
       Valid = false;
     return Expr;
   }
@@ -3804,7 +4004,7 @@ public:
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
     // Only allow AddRecExprs for this loop.
-    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+    if (!SE.isLoopInvariant(Expr, L))
       Valid = false;
     return Expr;
   }
@@ -3909,9 +4109,9 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
 
   case Instruction::Xor:
     if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
-      // If the RHS of the xor is a signbit, then this is just an add.
-      // Instcombine turns add of signbit into xor as a strength reduction step.
-      if (RHSC->getValue().isSignBit())
+      // If the RHS of the xor is a signmask, then this is just an add.
+      // Instcombine turns add of signmask into xor as a strength reduction step.
+      if (RHSC->getValue().isSignMask())
         return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
     return BinaryOp(Op);
 
@@ -3984,6 +4184,369 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
   return None;
 }
 
+/// Helper function to createAddRecFromPHIWithCasts. We have a phi 
+/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
+/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the 
+/// way. This function checks if \p Op, an operand of this SCEVAddExpr, 
+/// follows one of the following patterns:
+/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
+/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
+/// If the SCEV expression of \p Op conforms with one of the expected patterns
+/// we return the type of the truncation operation, and indicate whether the
+/// truncated type should be treated as signed/unsigned by setting 
+/// \p Signed to true/false, respectively.
+static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
+                               bool &Signed, ScalarEvolution &SE) {
+
+  // The case where Op == SymbolicPHI (that is, with no type conversions on 
+  // the way) is handled by the regular add recurrence creating logic and 
+  // would have already been triggered in createAddRecForPHI. Reaching it here
+  // means that createAddRecFromPHI had failed for this PHI before (e.g., 
+  // because one of the other operands of the SCEVAddExpr updating this PHI is
+  // not invariant). 
+  //
+  // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in 
+  // this case predicates that allow us to prove that Op == SymbolicPHI will
+  // be added.
+  if (Op == SymbolicPHI)
+    return nullptr;
+
+  unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
+  unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
+  if (SourceBits != NewBits)
+    return nullptr;
+
+  const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
+  const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
+  if (!SExt && !ZExt)
+    return nullptr;
+  const SCEVTruncateExpr *Trunc =
+      SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
+           : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
+  if (!Trunc)
+    return nullptr;
+  const SCEV *X = Trunc->getOperand();
+  if (X != SymbolicPHI)
+    return nullptr;
+  Signed = SExt ? true : false; 
+  return Trunc->getType();
+}
+
+static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
+  if (!PN->getType()->isIntegerTy())
+    return nullptr;
+  const Loop *L = LI.getLoopFor(PN->getParent());
+  if (!L || L->getHeader() != PN->getParent())
+    return nullptr;
+  return L;
+}
+
+// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
+// computation that updates the phi follows the following pattern:
+//   (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
+// which correspond to a phi->trunc->sext/zext->add->phi update chain.
+// If so, try to see if it can be rewritten as an AddRecExpr under some
+// Predicates. If successful, return them as a pair. Also cache the results
+// of the analysis.
+//
+// Example usage scenario:
+//    Say the Rewriter is called for the following SCEV:
+//         8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
+//    where:
+//         %X = phi i64 (%Start, %BEValue)
+//    It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
+//    and call this function with %SymbolicPHI = %X.
+//
+//    The analysis will find that the value coming around the backedge has 
+//    the following SCEV:
+//         BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
+//    Upon concluding that this matches the desired pattern, the function
+//    will return the pair {NewAddRec, SmallPredsVec} where:
+//         NewAddRec = {%Start,+,%Step}
+//         SmallPredsVec = {P1, P2, P3} as follows:
+//           P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
+//           P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
+//           P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
+//    The returned pair means that SymbolicPHI can be rewritten into NewAddRec
+//    under the predicates {P1,P2,P3}.
+//    This predicated rewrite will be cached in PredicatedSCEVRewrites:
+//         PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} 
+//
+// TODO's:
+//
+// 1) Extend the Induction descriptor to also support inductions that involve
+//    casts: When needed (namely, when we are called in the context of the 
+//    vectorizer induction analysis), a Set of cast instructions will be 
+//    populated by this method, and provided back to isInductionPHI. This is
+//    needed to allow the vectorizer to properly record them to be ignored by
+//    the cost model and to avoid vectorizing them (otherwise these casts,
+//    which are redundant under the runtime overflow checks, will be 
+//    vectorized, which can be costly).  
+//
+// 2) Support additional induction/PHISCEV patterns: We also want to support
+//    inductions where the sext-trunc / zext-trunc operations (partly) occur 
+//    after the induction update operation (the induction increment):
+//
+//      (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
+//    which correspond to a phi->add->trunc->sext/zext->phi update chain.
+//
+//      (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
+//    which correspond to a phi->trunc->add->sext/zext->phi update chain.
+//
+// 3) Outline common code with createAddRecFromPHI to avoid duplication.
+//
+Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
+  SmallVector<const SCEVPredicate *, 3> Predicates;
+
+  // *** Part1: Analyze if we have a phi-with-cast pattern for which we can 
+  // return an AddRec expression under some predicate.
+ 
+  auto *PN = cast<PHINode>(SymbolicPHI->getValue());
+  const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
+  assert (L && "Expecting an integer loop header phi");
+
+  // The loop may have multiple entrances or multiple exits; we can analyze
+  // this phi as an addrec if it has a unique entry value and a unique
+  // backedge value.
+  Value *BEValueV = nullptr, *StartValueV = nullptr;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (L->contains(PN->getIncomingBlock(i))) {
+      if (!BEValueV) {
+        BEValueV = V;
+      } else if (BEValueV != V) {
+        BEValueV = nullptr;
+        break;
+      }
+    } else if (!StartValueV) {
+      StartValueV = V;
+    } else if (StartValueV != V) {
+      StartValueV = nullptr;
+      break;
+    }
+  }
+  if (!BEValueV || !StartValueV)
+    return None;
+
+  const SCEV *BEValue = getSCEV(BEValueV);
+
+  // If the value coming around the backedge is an add with the symbolic
+  // value we just inserted, possibly with casts that we can ignore under
+  // an appropriate runtime guard, then we found a simple induction variable!
+  const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
+  if (!Add)
+    return None;
+
+  // If there is a single occurrence of the symbolic value, possibly
+  // casted, replace it with a recurrence. 
+  unsigned FoundIndex = Add->getNumOperands();
+  Type *TruncTy = nullptr;
+  bool Signed;
+  for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+    if ((TruncTy = 
+             isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
+      if (FoundIndex == e) {
+        FoundIndex = i;
+        break;
+      }
+
+  if (FoundIndex == Add->getNumOperands())
+    return None;
+
+  // Create an add with everything but the specified operand.
+  SmallVector<const SCEV *, 8> Ops;
+  for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+    if (i != FoundIndex)
+      Ops.push_back(Add->getOperand(i));
+  const SCEV *Accum = getAddExpr(Ops);
+
+  // The runtime checks will not be valid if the step amount is
+  // varying inside the loop.
+  if (!isLoopInvariant(Accum, L))
+    return None;
+
+  
+  // *** Part2: Create the predicates 
+
+  // Analysis was successful: we have a phi-with-cast pattern for which we
+  // can return an AddRec expression under the following predicates:
+  //
+  // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
+  //     fits within the truncated type (does not overflow) for i = 0 to n-1.
+  // P2: An Equal predicate that guarantees that 
+  //     Start = (Ext ix (Trunc iy (Start) to ix) to iy)
+  // P3: An Equal predicate that guarantees that 
+  //     Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
+  //
+  // As we next prove, the above predicates guarantee that: 
+  //     Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
+  //
+  //
+  // More formally, we want to prove that:
+  //     Expr(i+1) = Start + (i+1) * Accum 
+  //               = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 
+  //
+  // Given that:
+  // 1) Expr(0) = Start 
+  // 2) Expr(1) = Start + Accum 
+  //            = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
+  // 3) Induction hypothesis (step i):
+  //    Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum 
+  //
+  // Proof:
+  //  Expr(i+1) =
+  //   = Start + (i+1)*Accum
+  //   = (Start + i*Accum) + Accum
+  //   = Expr(i) + Accum  
+  //   = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum 
+  //                                                             :: from step i
+  //
+  //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum 
+  //
+  //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
+  //     + (Ext ix (Trunc iy (Accum) to ix) to iy)
+  //     + Accum                                                     :: from P3
+  //
+  //   = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) 
+  //     + Accum                            :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
+  //
+  //   = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
+  //   = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 
+  //
+  // By induction, the same applies to all iterations 1<=i<n:
+  //
+  
+  // Create a truncated addrec for which we will add a no overflow check (P1).
+  const SCEV *StartVal = getSCEV(StartValueV);
+  const SCEV *PHISCEV = 
+      getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
+                    getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); 
+  const auto *AR = cast<SCEVAddRecExpr>(PHISCEV);
+
+  SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
+      Signed ? SCEVWrapPredicate::IncrementNSSW
+             : SCEVWrapPredicate::IncrementNUSW;
+  const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
+  Predicates.push_back(AddRecPred);
+
+  // Create the Equal Predicates P2,P3:
+  auto AppendPredicate = [&](const SCEV *Expr) -> void {
+    assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
+    const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
+    const SCEV *ExtendedExpr =
+        Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType())
+               : getZeroExtendExpr(TruncatedExpr, Expr->getType());
+    if (Expr != ExtendedExpr &&
+        !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
+      const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
+      DEBUG (dbgs() << "Added Predicate: " << *Pred);
+      Predicates.push_back(Pred);
+    }
+  };
+  
+  AppendPredicate(StartVal);
+  AppendPredicate(Accum);
+  
+  // *** Part3: Predicates are ready. Now go ahead and create the new addrec in
+  // which the casts had been folded away. The caller can rewrite SymbolicPHI
+  // into NewAR if it will also add the runtime overflow checks specified in
+  // Predicates.  
+  auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
+
+  std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
+      std::make_pair(NewAR, Predicates);
+  // Remember the result of the analysis for this SCEV at this locayyytion.
+  PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
+  return PredRewrite;
+}
+
+Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
+
+  auto *PN = cast<PHINode>(SymbolicPHI->getValue());
+  const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
+  if (!L)
+    return None;
+
+  // Check to see if we already analyzed this PHI.
+  auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
+  if (I != PredicatedSCEVRewrites.end()) {
+    std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
+        I->second;
+    // Analysis was done before and failed to create an AddRec:
+    if (Rewrite.first == SymbolicPHI) 
+      return None;
+    // Analysis was done before and succeeded to create an AddRec under
+    // a predicate:
+    assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
+    assert(!(Rewrite.second).empty() && "Expected to find Predicates");
+    return Rewrite;
+  }
+
+  Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+    Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
+
+  // Record in the cache that the analysis failed
+  if (!Rewrite) {
+    SmallVector<const SCEVPredicate *, 3> Predicates;
+    PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
+    return None;
+  }
+
+  return Rewrite;
+}
+
+/// A helper function for createAddRecFromPHI to handle simple cases.
+///
+/// This function tries to find an AddRec expression for the simplest (yet most
+/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)).
+/// If it fails, createAddRecFromPHI will use a more general, but slow,
+/// technique for finding the AddRec expression.
+const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
+                                                      Value *BEValueV,
+                                                      Value *StartValueV) {
+  const Loop *L = LI.getLoopFor(PN->getParent());
+  assert(L && L->getHeader() == PN->getParent());
+  assert(BEValueV && StartValueV);
+
+  auto BO = MatchBinaryOp(BEValueV, DT);
+  if (!BO)
+    return nullptr;
+
+  if (BO->Opcode != Instruction::Add)
+    return nullptr;
+
+  const SCEV *Accum = nullptr;
+  if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
+    Accum = getSCEV(BO->RHS);
+  else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
+    Accum = getSCEV(BO->LHS);
+
+  if (!Accum)
+    return nullptr;
+
+  SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+  if (BO->IsNUW)
+    Flags = setFlags(Flags, SCEV::FlagNUW);
+  if (BO->IsNSW)
+    Flags = setFlags(Flags, SCEV::FlagNSW);
+
+  const SCEV *StartVal = getSCEV(StartValueV);
+  const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+  ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+
+  // We can add Flags to the post-inc expression only if we
+  // know that it is *undefined behavior* for BEValueV to
+  // overflow.
+  if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
+    if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
+      (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+  return PHISCEV;
+}
+
 const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
   const Loop *L = LI.getLoopFor(PN->getParent());
   if (!L || L->getHeader() != PN->getParent())
@@ -4009,127 +4572,134 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
       break;
     }
   }
-  if (BEValueV && StartValueV) {
-    // While we are analyzing this PHI node, handle its value symbolically.
-    const SCEV *SymbolicName = getUnknown(PN);
-    assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
-           "PHI node already processed?");
-    ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+  if (!BEValueV || !StartValueV)
+    return nullptr;
 
-    // Using this symbolic name for the PHI, analyze the value coming around
-    // the back-edge.
-    const SCEV *BEValue = getSCEV(BEValueV);
+  assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+         "PHI node already processed?");
 
-    // NOTE: If BEValue is loop invariant, we know that the PHI node just
-    // has a special value for the first iteration of the loop.
+  // First, try to find AddRec expression without creating a fictituos symbolic
+  // value for PN.
+  if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
+    return S;
+
+  // Handle PHI node value symbolically.
+  const SCEV *SymbolicName = getUnknown(PN);
+  ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+
+  // Using this symbolic name for the PHI, analyze the value coming around
+  // the back-edge.
+  const SCEV *BEValue = getSCEV(BEValueV);
+
+  // NOTE: If BEValue is loop invariant, we know that the PHI node just
+  // has a special value for the first iteration of the loop.
+
+  // If the value coming around the backedge is an add with the symbolic
+  // value we just inserted, then we found a simple induction variable!
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+    // If there is a single occurrence of the symbolic value, replace it
+    // with a recurrence.
+    unsigned FoundIndex = Add->getNumOperands();
+    for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+      if (Add->getOperand(i) == SymbolicName)
+        if (FoundIndex == e) {
+          FoundIndex = i;
+          break;
+        }
 
-    // If the value coming around the backedge is an add with the symbolic
-    // value we just inserted, then we found a simple induction variable!
-    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
-      // If there is a single occurrence of the symbolic value, replace it
-      // with a recurrence.
-      unsigned FoundIndex = Add->getNumOperands();
+    if (FoundIndex != Add->getNumOperands()) {
+      // Create an add with everything but the specified operand.
+      SmallVector<const SCEV *, 8> Ops;
       for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-        if (Add->getOperand(i) == SymbolicName)
-          if (FoundIndex == e) {
-            FoundIndex = i;
-            break;
+        if (i != FoundIndex)
+          Ops.push_back(Add->getOperand(i));
+      const SCEV *Accum = getAddExpr(Ops);
+
+      // This is not a valid addrec if the step amount is varying each
+      // loop iteration, but is not itself an addrec in this loop.
+      if (isLoopInvariant(Accum, L) ||
+          (isa<SCEVAddRecExpr>(Accum) &&
+           cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+        SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+        if (auto BO = MatchBinaryOp(BEValueV, DT)) {
+          if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
+            if (BO->IsNUW)
+              Flags = setFlags(Flags, SCEV::FlagNUW);
+            if (BO->IsNSW)
+              Flags = setFlags(Flags, SCEV::FlagNSW);
           }
-
-      if (FoundIndex != Add->getNumOperands()) {
-        // Create an add with everything but the specified operand.
-        SmallVector<const SCEV *, 8> Ops;
-        for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-          if (i != FoundIndex)
-            Ops.push_back(Add->getOperand(i));
-        const SCEV *Accum = getAddExpr(Ops);
-
-        // This is not a valid addrec if the step amount is varying each
-        // loop iteration, but is not itself an addrec in this loop.
-        if (isLoopInvariant(Accum, L) ||
-            (isa<SCEVAddRecExpr>(Accum) &&
-             cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-
-          if (auto BO = MatchBinaryOp(BEValueV, DT)) {
-            if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
-              if (BO->IsNUW)
-                Flags = setFlags(Flags, SCEV::FlagNUW);
-              if (BO->IsNSW)
-                Flags = setFlags(Flags, SCEV::FlagNSW);
-            }
-          } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
-            // If the increment is an inbounds GEP, then we know the address
-            // space cannot be wrapped around. We cannot make any guarantee
-            // about signed or unsigned overflow because pointers are
-            // unsigned but we may have a negative index from the base
-            // pointer. We can guarantee that no unsigned wrap occurs if the
-            // indices form a positive value.
-            if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
-              Flags = setFlags(Flags, SCEV::FlagNW);
-
-              const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
-              if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
-                Flags = setFlags(Flags, SCEV::FlagNUW);
-            }
-
-            // We cannot transfer nuw and nsw flags from subtraction
-            // operations -- sub nuw X, Y is not the same as add nuw X, -Y
-            // for instance.
+        } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+          // If the increment is an inbounds GEP, then we know the address
+          // space cannot be wrapped around. We cannot make any guarantee
+          // about signed or unsigned overflow because pointers are
+          // unsigned but we may have a negative index from the base
+          // pointer. We can guarantee that no unsigned wrap occurs if the
+          // indices form a positive value.
+          if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+            Flags = setFlags(Flags, SCEV::FlagNW);
+
+            const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+            if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+              Flags = setFlags(Flags, SCEV::FlagNUW);
           }
 
-          const SCEV *StartVal = getSCEV(StartValueV);
-          const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+          // We cannot transfer nuw and nsw flags from subtraction
+          // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+          // for instance.
+        }
 
-          // Okay, for the entire analysis of this edge we assumed the PHI
-          // to be symbolic.  We now need to go back and purge all of the
-          // entries for the scalars that use the symbolic expression.
-          forgetSymbolicName(PN, SymbolicName);
-          ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+        const SCEV *StartVal = getSCEV(StartValueV);
+        const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
 
-          // We can add Flags to the post-inc expression only if we
-          // know that it us *undefined behavior* for BEValueV to
-          // overflow.
-          if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
-            if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
-              (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+        // Okay, for the entire analysis of this edge we assumed the PHI
+        // to be symbolic.  We now need to go back and purge all of the
+        // entries for the scalars that use the symbolic expression.
+        forgetSymbolicName(PN, SymbolicName);
+        ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
 
-          return PHISCEV;
-        }
+        // We can add Flags to the post-inc expression only if we
+        // know that it is *undefined behavior* for BEValueV to
+        // overflow.
+        if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
+          if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
+            (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+        return PHISCEV;
       }
-    } else {
-      // Otherwise, this could be a loop like this:
-      //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
-      // In this case, j = {1,+,1}  and BEValue is j.
-      // Because the other in-value of i (0) fits the evolution of BEValue
-      // i really is an addrec evolution.
-      //
-      // We can generalize this saying that i is the shifted value of BEValue
-      // by one iteration:
-      //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
-      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
-      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
-      if (Shifted != getCouldNotCompute() &&
-          Start != getCouldNotCompute()) {
-        const SCEV *StartVal = getSCEV(StartValueV);
-        if (Start == StartVal) {
-          // Okay, for the entire analysis of this edge we assumed the PHI
-          // to be symbolic.  We now need to go back and purge all of the
-          // entries for the scalars that use the symbolic expression.
-          forgetSymbolicName(PN, SymbolicName);
-          ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
-          return Shifted;
-        }
+    }
+  } else {
+    // Otherwise, this could be a loop like this:
+    //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+    // In this case, j = {1,+,1}  and BEValue is j.
+    // Because the other in-value of i (0) fits the evolution of BEValue
+    // i really is an addrec evolution.
+    //
+    // We can generalize this saying that i is the shifted value of BEValue
+    // by one iteration:
+    //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
+    const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+    const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+    if (Shifted != getCouldNotCompute() &&
+        Start != getCouldNotCompute()) {
+      const SCEV *StartVal = getSCEV(StartValueV);
+      if (Start == StartVal) {
+        // Okay, for the entire analysis of this edge we assumed the PHI
+        // to be symbolic.  We now need to go back and purge all of the
+        // entries for the scalars that use the symbolic expression.
+        forgetSymbolicName(PN, SymbolicName);
+        ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+        return Shifted;
       }
     }
-
-    // Remove the temporary PHI node SCEV that has been inserted while intending
-    // to create an AddRecExpr for this PHI node. We can not keep this temporary
-    // as it will prevent later (possibly simpler) SCEV expressions to be added
-    // to the ValueExprMap.
-    eraseValueFromMap(PN);
   }
 
+  // Remove the temporary PHI node SCEV that has been inserted while intending
+  // to create an AddRecExpr for this PHI node. We can not keep this temporary
+  // as it will prevent later (possibly simpler) SCEV expressions to be added
+  // to the ValueExprMap.
+  eraseValueFromMap(PN);
+
   return nullptr;
 }
 
@@ -4289,7 +4859,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+  if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
     if (LI.replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -4409,8 +4979,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   return getGEPExpr(GEP, IndexExprs);
 }
 
-uint32_t
-ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getAPInt().countTrailingZeros();
 
@@ -4420,14 +4989,16 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
 
   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
-    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
-             getTypeSizeInBits(E->getType()) : OpRes;
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType())
+               ? getTypeSizeInBits(E->getType())
+               : OpRes;
   }
 
   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
-    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
-             getTypeSizeInBits(E->getType()) : OpRes;
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType())
+               ? getTypeSizeInBits(E->getType())
+               : OpRes;
   }
 
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
@@ -4444,8 +5015,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
     uint32_t BitWidth = getTypeSizeInBits(M->getType());
     for (unsigned i = 1, e = M->getNumOperands();
          SumOpRes != BitWidth && i != e; ++i)
-      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
-                          BitWidth);
+      SumOpRes =
+          std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
     return SumOpRes;
   }
 
@@ -4475,17 +5046,25 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    unsigned BitWidth = getTypeSizeInBits(U->getType());
-    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
-                     nullptr, &DT);
-    return Zeros.countTrailingOnes();
+    KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
+    return Known.countMinTrailingZeros();
   }
 
   // SCEVUDivExpr
   return 0;
 }
 
+uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+  auto I = MinTrailingZerosCache.find(S);
+  if (I != MinTrailingZerosCache.end())
+    return I->second;
+
+  uint32_t Result = GetMinTrailingZerosImpl(S);
+  auto InsertPair = MinTrailingZerosCache.insert({S, Result});
+  assert(InsertPair.second && "Should insert a new key");
+  return InsertPair.first->second;
+}
+
 /// Helper method to assign a range to V from metadata present in the IR.
 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
@@ -4498,9 +5077,9 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
 /// Determine the range for a particular SCEV.  If SignHint is
 /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
 /// with a "cleaner" unsigned (resp. signed) representation.
-ConstantRange
-ScalarEvolution::getRange(const SCEV *S,
-                          ScalarEvolution::RangeSignHint SignHint) {
+const ConstantRange &
+ScalarEvolution::getRangeRef(const SCEV *S,
+                             ScalarEvolution::RangeSignHint SignHint) {
   DenseMap<const SCEV *, ConstantRange> &Cache =
       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
                                                        : SignedRanges;
@@ -4531,54 +5110,54 @@ ScalarEvolution::getRange(const SCEV *S,
   }
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    ConstantRange X = getRange(Add->getOperand(0), SignHint);
+    ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
-      X = X.add(getRange(Add->getOperand(i), SignHint));
+      X = X.add(getRangeRef(Add->getOperand(i), SignHint));
     return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
-    ConstantRange X = getRange(Mul->getOperand(0), SignHint);
+    ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
-      X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+      X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
     return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
-    ConstantRange X = getRange(SMax->getOperand(0), SignHint);
+    ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
-      X = X.smax(getRange(SMax->getOperand(i), SignHint));
+      X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
     return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
-    ConstantRange X = getRange(UMax->getOperand(0), SignHint);
+    ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
-      X = X.umax(getRange(UMax->getOperand(i), SignHint));
+      X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
     return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
-    ConstantRange X = getRange(UDiv->getLHS(), SignHint);
-    ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+    ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
+    ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
     return setRange(UDiv, SignHint,
                     ConservativeResult.intersectWith(X.udiv(Y)));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+    ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
     return setRange(ZExt, SignHint,
                     ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
-    ConstantRange X = getRange(SExt->getOperand(), SignHint);
+    ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
     return setRange(SExt, SignHint,
                     ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
-    ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+    ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
     return setRange(Trunc, SignHint,
                     ConservativeResult.intersectWith(X.truncate(BitWidth)));
   }
@@ -4632,7 +5211,7 @@ ScalarEvolution::getRange(const SCEV *S,
       }
     }
 
-    return setRange(AddRec, SignHint, ConservativeResult);
+    return setRange(AddRec, SignHint, std::move(ConservativeResult));
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -4647,11 +5226,11 @@ ScalarEvolution::getRange(const SCEV *S,
     const DataLayout &DL = getDataLayout();
     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
       // For a SCEVUnknown, ask ValueTracking.
-      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
-      if (Ones != ~Zeros + 1)
+      KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+      if (Known.One != ~Known.Zero + 1)
         ConservativeResult =
-            ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+            ConservativeResult.intersectWith(ConstantRange(Known.One,
+                                                           ~Known.Zero + 1));
     } else {
       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
              "generalize as needed!");
@@ -4662,10 +5241,78 @@ ScalarEvolution::getRange(const SCEV *S,
                           APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
     }
 
-    return setRange(U, SignHint, ConservativeResult);
+    return setRange(U, SignHint, std::move(ConservativeResult));
   }
 
-  return setRange(S, SignHint, ConservativeResult);
+  return setRange(S, SignHint, std::move(ConservativeResult));
+}
+
+// Given a StartRange, Step and MaxBECount for an expression compute a range of
+// values that the expression can take. Initially, the expression has a value
+// from StartRange and then is changed by Step up to MaxBECount times. Signed
+// argument defines if we treat Step as signed or unsigned.
+static ConstantRange getRangeForAffineARHelper(APInt Step,
+                                               const ConstantRange &StartRange,
+                                               const APInt &MaxBECount,
+                                               unsigned BitWidth, bool Signed) {
+  // If either Step or MaxBECount is 0, then the expression won't change, and we
+  // just need to return the initial range.
+  if (Step == 0 || MaxBECount == 0)
+    return StartRange;
+
+  // If we don't know anything about the initial value (i.e. StartRange is
+  // FullRange), then we don't know anything about the final range either.
+  // Return FullRange.
+  if (StartRange.isFullSet())
+    return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+  // If Step is signed and negative, then we use its absolute value, but we also
+  // note that we're moving in the opposite direction.
+  bool Descending = Signed && Step.isNegative();
+
+  if (Signed)
+    // This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
+    // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
+    // This equations hold true due to the well-defined wrap-around behavior of
+    // APInt.
+    Step = Step.abs();
+
+  // Check if Offset is more than full span of BitWidth. If it is, the
+  // expression is guaranteed to overflow.
+  if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
+    return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+  // Offset is by how much the expression can change. Checks above guarantee no
+  // overflow here.
+  APInt Offset = Step * MaxBECount;
+
+  // Minimum value of the final range will match the minimal value of StartRange
+  // if the expression is increasing and will be decreased by Offset otherwise.
+  // Maximum value of the final range will match the maximal value of StartRange
+  // if the expression is decreasing and will be increased by Offset otherwise.
+  APInt StartLower = StartRange.getLower();
+  APInt StartUpper = StartRange.getUpper() - 1;
+  APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
+                                   : (StartUpper + std::move(Offset));
+
+  // It's possible that the new minimum/maximum value will fall into the initial
+  // range (due to wrap around). This means that the expression can take any
+  // value in this bitwidth, and we have to return full range.
+  if (StartRange.contains(MovedBoundary))
+    return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+  APInt NewLower =
+      Descending ? std::move(MovedBoundary) : std::move(StartLower);
+  APInt NewUpper =
+      Descending ? std::move(StartUpper) : std::move(MovedBoundary);
+  NewUpper += 1;
+
+  // If we end up with full range, return a proper full range.
+  if (NewLower == NewUpper)
+    return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+  // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
+  return ConstantRange(std::move(NewLower), std::move(NewUpper));
 }
 
 ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
@@ -4676,60 +5323,29 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
          "Precondition!");
 
-  ConstantRange Result(BitWidth, /* isFullSet = */ true);
-
-  // Check for overflow.  This must be done with ConstantRange arithmetic
-  // because we could be called from within the ScalarEvolution overflow
-  // checking code.
-
   MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
-  ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
-  ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
+  APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
 
+  // First, consider step signed.
+  ConstantRange StartSRange = getSignedRange(Start);
   ConstantRange StepSRange = getSignedRange(Step);
-  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
-
-  ConstantRange StartURange = getUnsignedRange(Start);
-  ConstantRange EndURange =
-      StartURange.add(MaxBECountRange.multiply(StepSRange));
-
-  // Check for unsigned overflow.
-  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
-  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
-  if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
-      ZExtEndURange) {
-    APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
-                               EndURange.getUnsignedMin());
-    APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
-                               EndURange.getUnsignedMax());
-    bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
-    if (!IsFullRange)
-      Result =
-          Result.intersectWith(ConstantRange(Min, Max + 1));
-  }
 
-  ConstantRange StartSRange = getSignedRange(Start);
-  ConstantRange EndSRange =
-      StartSRange.add(MaxBECountRange.multiply(StepSRange));
-
-  // Check for signed overflow. This must be done with ConstantRange
-  // arithmetic because we could be called from within the ScalarEvolution
-  // overflow checking code.
-  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
-  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
-  if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
-      SExtEndSRange) {
-    APInt Min =
-        APIntOps::smin(StartSRange.getSignedMin(), EndSRange.getSignedMin());
-    APInt Max =
-        APIntOps::smax(StartSRange.getSignedMax(), EndSRange.getSignedMax());
-    bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
-    if (!IsFullRange)
-      Result =
-          Result.intersectWith(ConstantRange(Min, Max + 1));
-  }
+  // If Step can be both positive and negative, we need to find ranges for the
+  // maximum absolute step values in both directions and union them.
+  ConstantRange SR =
+      getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
+                                MaxBECountValue, BitWidth, /* Signed = */ true);
+  SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
+                                              StartSRange, MaxBECountValue,
+                                              BitWidth, /* Signed = */ true));
 
-  return Result;
+  // Next, consider step unsigned.
+  ConstantRange UR = getRangeForAffineARHelper(
+      getUnsignedRangeMax(Step), getUnsignedRange(Start),
+      MaxBECountValue, BitWidth, /* Signed = */ false);
+
+  // Finally, intersect signed and unsigned ranges.
+  return SR.intersectWith(UR);
 }
 
 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
@@ -4875,7 +5491,8 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
     return false;
 
   // Only proceed if we can prove that I does not yield poison.
-  if (!isKnownNotFullPoison(I)) return false;
+  if (!programUndefinedIfFullPoison(I))
+    return false;
 
   // At this point we know that if I is executed, then it does not wrap
   // according to at least one of NSW or NUW. If I is not executed, then we do
@@ -5128,9 +5745,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       // For an expression like x&255 that merely masks off the high bits,
       // use zext(trunc(x)) as the SCEV expression.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
-        if (CI->isNullValue())
+        if (CI->isZero())
           return getSCEV(BO->RHS);
-        if (CI->isAllOnesValue())
+        if (CI->isMinusOne())
           return getSCEV(BO->LHS);
         const APInt &A = CI->getValue();
 
@@ -5141,19 +5758,34 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         unsigned LZ = A.countLeadingZeros();
         unsigned TZ = A.countTrailingZeros();
         unsigned BitWidth = A.getBitWidth();
-        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-        computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(),
+        KnownBits Known(BitWidth);
+        computeKnownBits(BO->LHS, Known, getDataLayout(),
                          0, &AC, nullptr, &DT);
 
         APInt EffectiveMask =
             APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
-        if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
-          const SCEV *MulCount = getConstant(ConstantInt::get(
-              getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+        if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
+          const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
+          const SCEV *LHS = getSCEV(BO->LHS);
+          const SCEV *ShiftedLHS = nullptr;
+          if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
+            if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
+              // For an expression like (x * 8) & 8, simplify the multiply.
+              unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
+              unsigned GCD = std::min(MulZeros, TZ);
+              APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
+              SmallVector<const SCEV*, 4> MulOps;
+              MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
+              MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
+              auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
+              ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
+            }
+          }
+          if (!ShiftedLHS)
+            ShiftedLHS = getUDivExpr(LHS, MulCount);
           return getMulExpr(
               getZeroExtendExpr(
-                  getTruncateExpr(
-                      getUDivExactExpr(getSCEV(BO->LHS), MulCount),
+                  getTruncateExpr(ShiftedLHS,
                       IntegerType::get(getContext(), BitWidth - LZ - TZ)),
                   BO->LHS->getType()),
               MulCount);
@@ -5190,7 +5822,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     case Instruction::Xor:
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
         // If the RHS of xor is -1, then this is a not operation.
-        if (CI->isAllOnesValue())
+        if (CI->isMinusOne())
           return getNotSCEV(getSCEV(BO->LHS));
 
         // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
@@ -5211,7 +5843,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
                 // If C is a low-bits mask, the zero extend is serving to
                 // mask off the high bits. Complement the operand and
                 // re-apply the zext.
-                if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                if (CI->getValue().isMask(Z0TySize))
                   return getZeroExtendExpr(getNotSCEV(Z0), UTy);
 
                 // If C is a single bit, it may be in the sign-bit position
@@ -5219,7 +5851,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
                 // using an add, which is equivalent, and re-apply the zext.
                 APInt Trunc = CI->getValue().trunc(Z0TySize);
                 if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
-                    Trunc.isSignBit())
+                    Trunc.isSignMask())
                   return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
                                            UTy);
               }
@@ -5255,28 +5887,55 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     break;
 
     case Instruction::AShr:
-      // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS))
-        if (Operator *L = dyn_cast<Operator>(BO->LHS))
-          if (L->getOpcode() == Instruction::Shl &&
-              L->getOperand(1) == BO->RHS) {
-            uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType());
-
-            // If the shift count is not less than the bitwidth, the result of
-            // the shift is undefined. Don't try to analyze it, because the
-            // resolution chosen here may differ from the resolution chosen in
-            // other parts of the compiler.
-            if (CI->getValue().uge(BitWidth))
-              break;
+      // AShr X, C, where C is a constant.
+      ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
+      if (!CI)
+        break;
+
+      Type *OuterTy = BO->LHS->getType();
+      uint64_t BitWidth = getTypeSizeInBits(OuterTy);
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (CI->getValue().uge(BitWidth))
+        break;
 
-            uint64_t Amt = BitWidth - CI->getZExtValue();
-            if (Amt == BitWidth)
-              return getSCEV(L->getOperand(0)); // shift by zero --> noop
+      if (CI->isZero())
+        return getSCEV(BO->LHS); // shift by zero --> noop
+
+      uint64_t AShrAmt = CI->getZExtValue();
+      Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
+
+      Operator *L = dyn_cast<Operator>(BO->LHS);
+      if (L && L->getOpcode() == Instruction::Shl) {
+        // X = Shl A, n
+        // Y = AShr X, m
+        // Both n and m are constant.
+
+        const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
+        if (L->getOperand(1) == BO->RHS)
+          // For a two-shift sext-inreg, i.e. n = m,
+          // use sext(trunc(x)) as the SCEV expression.
+          return getSignExtendExpr(
+              getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
+
+        ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
+        if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
+          uint64_t ShlAmt = ShlAmtCI->getZExtValue();
+          if (ShlAmt > AShrAmt) {
+            // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
+            // expression. We already checked that ShlAmt < BitWidth, so
+            // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
+            // ShlAmt - AShrAmt < Amt.
+            APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
+                                            ShlAmt - AShrAmt);
             return getSignExtendExpr(
-                getTruncateExpr(getSCEV(L->getOperand(0)),
-                                IntegerType::get(getContext(), Amt)),
-                BO->LHS->getType());
+                getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
+                getConstant(Mul)), OuterTy);
           }
+        }
+      }
       break;
     }
   }
@@ -5348,7 +6007,7 @@ static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
   return ((unsigned)ExitConst->getZExtValue()) + 1;
 }
 
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripCount(L, ExitingBB);
 
@@ -5356,7 +6015,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
   return 0;
 }
 
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
                                                     BasicBlock *ExitingBlock) {
   assert(ExitingBlock && "Must pass a non-null exiting block!");
   assert(L->isLoopExiting(ExitingBlock) &&
@@ -5366,13 +6025,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
   return getConstantTripCount(ExitCount);
 }
 
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
   const auto *MaxExitCount =
       dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
   return getConstantTripCount(MaxExitCount);
 }
 
-unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripMultiple(L, ExitingBB);
 
@@ -5393,7 +6052,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
 /// As explained in the comments for getSmallConstantTripCount, this assumes
 /// that control exits the loop via ExitingBlock.
 unsigned
-ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
                                               BasicBlock *ExitingBlock) {
   assert(ExitingBlock && "Must pass a non-null exiting block!");
   assert(L->isLoopExiting(ExitingBlock) &&
@@ -5403,17 +6062,16 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
     return 1;
 
   // Get the trip count from the BE count by adding 1.
-  const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
-  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
-  // to factor simple cases.
-  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
-    TCMul = Mul->getOperand(0);
-
-  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
-  if (!MulC)
-    return 1;
+  const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType()));
+
+  const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
+  if (!TC)
+    // Attempt to factor more general cases. Returns the greatest power of
+    // two divisor. If overflow happens, the trip count expression is still
+    // divisible by the greatest power of 2 divisor returned.
+    return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr));
 
-  ConstantInt *Result = MulC->getValue();
+  ConstantInt *Result = TC->getValue();
 
   // Guard against huge trip counts (this requires checking
   // for zero to handle the case where the trip count == -1 and the
@@ -5428,7 +6086,8 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
 /// Get the expression for the number of loop iterations for which this loop is
 /// guaranteed not to exit via ExitingBlock. Otherwise return
 /// SCEVCouldNotCompute.
-const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+const SCEV *ScalarEvolution::getExitCount(const Loop *L,
+                                          BasicBlock *ExitingBlock) {
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
 }
 
@@ -5569,6 +6228,16 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
   RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
   RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
 
+  // Drop information about predicated SCEV rewrites for this loop.
+  for (auto I = PredicatedSCEVRewrites.begin();
+       I != PredicatedSCEVRewrites.end();) {
+    std::pair<const SCEV *, const Loop *> Entry = I->first;
+    if (Entry.second == L)
+      PredicatedSCEVRewrites.erase(I++);
+    else
+      ++I;
+  }
+
   // Drop information about expressions based on loop-header PHIs.
   SmallVector<Instruction *, 16> Worklist;
   PushLoopPHIs(L, Worklist);
@@ -5681,6 +6350,8 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
   if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
     return SE->getCouldNotCompute();
 
+  assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) &&
+         "No point in having a non-constant max backedge taken count!");
   return getMax();
 }
 
@@ -5705,6 +6376,45 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
   return false;
 }
 
+ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
+    : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {
+  assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+          isa<SCEVConstant>(MaxNotTaken)) &&
+         "No point in having a non-constant max backedge taken count!");
+}
+
+ScalarEvolution::ExitLimit::ExitLimit(
+    const SCEV *E, const SCEV *M, bool MaxOrZero,
+    ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
+    : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+  assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
+          !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
+         "Exact is not allowed to be less precise than Max");
+  assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+          isa<SCEVConstant>(MaxNotTaken)) &&
+         "No point in having a non-constant max backedge taken count!");
+  for (auto *PredSet : PredSetList)
+    for (auto *P : *PredSet)
+      addPredicate(P);
+}
+
+ScalarEvolution::ExitLimit::ExitLimit(
+    const SCEV *E, const SCEV *M, bool MaxOrZero,
+    const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
+    : ExitLimit(E, M, MaxOrZero, {&PredSet}) {
+  assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+          isa<SCEVConstant>(MaxNotTaken)) &&
+         "No point in having a non-constant max backedge taken count!");
+}
+
+ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
+                                      bool MaxOrZero)
+    : ExitLimit(E, M, MaxOrZero, None) {
+  assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+          isa<SCEVConstant>(MaxNotTaken)) &&
+         "No point in having a non-constant max backedge taken count!");
+}
+
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -5728,6 +6438,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
 
         return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
       });
+  assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
+         "No point in having a non-constant max backedge taken count!");
 }
 
 /// Invalidate this result and free the ExitNotTakenInfo array.
@@ -5886,24 +6598,74 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
   return getCouldNotCompute();
 }
 
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimitFromCond(const Loop *L,
-                                          Value *ExitCond,
-                                          BasicBlock *TBB,
-                                          BasicBlock *FBB,
-                                          bool ControlsExit,
-                                          bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
+    const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB,
+    bool ControlsExit, bool AllowPredicates) {
+  ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates);
+  return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB,
+                                        ControlsExit, AllowPredicates);
+}
+
+Optional<ScalarEvolution::ExitLimit>
+ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
+                                      BasicBlock *TBB, BasicBlock *FBB,
+                                      bool ControlsExit, bool AllowPredicates) {
+  (void)this->L;
+  (void)this->TBB;
+  (void)this->FBB;
+  (void)this->AllowPredicates;
+
+  assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+         this->AllowPredicates == AllowPredicates &&
+         "Variance in assumed invariant key components!");
+  auto Itr = TripCountMap.find({ExitCond, ControlsExit});
+  if (Itr == TripCountMap.end())
+    return None;
+  return Itr->second;
+}
+
+void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
+                                             BasicBlock *TBB, BasicBlock *FBB,
+                                             bool ControlsExit,
+                                             bool AllowPredicates,
+                                             const ExitLimit &EL) {
+  assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+         this->AllowPredicates == AllowPredicates &&
+         "Variance in assumed invariant key components!");
+
+  auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
+  assert(InsertResult.second && "Expected successful insertion!");
+  (void)InsertResult;
+}
+
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
+    ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
+    BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
+
+  if (auto MaybeEL =
+          Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates))
+    return *MaybeEL;
+
+  ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB,
+                                              ControlsExit, AllowPredicates);
+  Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL);
+  return EL;
+}
+
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
+    ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
+    BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
       bool EitherMayExit = L->contains(TBB);
-      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit,
-                                               AllowPredicates);
-      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit,
-                                               AllowPredicates);
+      ExitLimit EL0 = computeExitLimitFromCondCached(
+          Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
+          AllowPredicates);
+      ExitLimit EL1 = computeExitLimitFromCondCached(
+          Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
+          AllowPredicates);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -5939,7 +6701,7 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
       // to not.
       if (isa<SCEVCouldNotCompute>(MaxBECount) &&
           !isa<SCEVCouldNotCompute>(BECount))
-        MaxBECount = BECount;
+        MaxBECount = getConstant(getUnsignedRangeMax(BECount));
 
       return ExitLimit(BECount, MaxBECount, false,
                        {&EL0.Predicates, &EL1.Predicates});
@@ -5947,12 +6709,12 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
       bool EitherMayExit = L->contains(FBB);
-      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit,
-                                               AllowPredicates);
-      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit,
-                                               AllowPredicates);
+      ExitLimit EL0 = computeExitLimitFromCondCached(
+          Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
+          AllowPredicates);
+      ExitLimit EL1 = computeExitLimitFromCondCached(
+          Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
+          AllowPredicates);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -6337,13 +7099,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
     // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
     // bitwidth(K) iterations.
     Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
-    bool KnownZero, KnownOne;
-    ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
-                   Predecessor->getTerminator(), &DT);
+    KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr,
+                                       Predecessor->getTerminator(), &DT);
     auto *Ty = cast<IntegerType>(RHS->getType());
-    if (KnownZero)
+    if (Known.isNonNegative())
       StableValue = ConstantInt::get(Ty, 0);
-    else if (KnownOne)
+    else if (Known.isNegative())
       StableValue = ConstantInt::get(Ty, -1, true);
     else
       return getCouldNotCompute();
@@ -6383,7 +7144,7 @@ static bool CanConstantFold(const Instruction *I) {
 
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction())
-      return canConstantFoldCallTo(F);
+      return canConstantFoldCallTo(CI, F);
   return false;
 }
 
@@ -6408,7 +7169,10 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) {
 /// recursing through each instruction operand until reaching a loop header phi.
 static PHINode *
 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
-                               DenseMap<Instruction *, PHINode *> &PHIMap) {
+                               DenseMap<Instruction *, PHINode *> &PHIMap,
+                               unsigned Depth) {
+  if (Depth > MaxConstantEvolvingDepth)
+    return nullptr;
 
   // Otherwise, we can evaluate this instruction if all of its operands are
   // constant or derived from a PHI node themselves.
@@ -6428,7 +7192,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
     if (!P) {
       // Recurse and memoize the results, whether a phi is found or not.
       // This recursive call invalidates pointers into PHIMap.
-      P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+      P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
       PHIMap[OpInst] = P;
     }
     if (!P)
@@ -6455,7 +7219,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 
   // Record non-constant instructions contained by the loop.
   DenseMap<Instruction *, PHINode *> PHIMap;
-  return getConstantEvolvingPHIOperands(I, L, PHIMap);
+  return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
 }
 
 /// EvaluateExpression - Given an expression that passes the
@@ -6829,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
             const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
             if (const SCEVConstant *BTCC =
                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+
+              // This trivial case can show up in some degenerate cases where
+              // the incoming IR has not yet been fully simplified.
+              if (BTCC->getValue()->isZero()) {
+                Value *InitValue = nullptr;
+                bool MultipleInitValues = false;
+                for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+                  if (!LI->contains(PN->getIncomingBlock(i))) {
+                    if (!InitValue)
+                      InitValue = PN->getIncomingValue(i);
+                    else if (InitValue != PN->getIncomingValue(i)) {
+                      MultipleInitValues = true;
+                      break;
+                    }
+                  }
+                  if (!MultipleInitValues && InitValue)
+                    return getSCEV(InitValue);
+                }
+              }
               // Okay, we know how many times the containing loop executes.  If
               // this is a constant evolving PHI node, get the final value at
               // the specified iteration number.
@@ -7014,10 +7797,10 @@ const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
 /// A and B isn't important.
 ///
 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
-static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
                                                ScalarEvolution &SE) {
   uint32_t BW = A.getBitWidth();
-  assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+  assert(BW == SE.getTypeSizeInBits(B->getType()));
   assert(A != 0 && "A must be non-zero.");
 
   // 1. D = gcd(A, N)
@@ -7031,7 +7814,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
   //
   // B is divisible by D if and only if the multiplicity of prime factor 2 for B
   // is not less than multiplicity of this prime factor for D.
-  if (B.countTrailingZeros() < Mult2)
+  if (SE.GetMinTrailingZeros(B) < Mult2)
     return SE.getCouldNotCompute();
 
   // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
@@ -7049,9 +7832,8 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
   // I * (B / D) mod (N / D)
   // To simplify the computation, we factor out the divide by D:
   // (I * B mod N) / D
-  APInt Result = (I * B).lshr(Mult2);
-
-  return SE.getConstant(Result);
+  const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
+  return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
 }
 
 /// Find the roots of the quadratic equation for the given quadratic chrec
@@ -7074,50 +7856,50 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   const APInt &M = MC->getAPInt();
   const APInt &N = NC->getAPInt();
   APInt Two(BitWidth, 2);
-  APInt Four(BitWidth, 4);
-
-  {
-    using namespace APIntOps;
-    const APInt& C = L;
-    // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
-    // The B coefficient is M-N/2
-    APInt B(M);
-    B -= sdiv(N,Two);
-
-    // The A coefficient is N/2
-    APInt A(N.sdiv(Two));
-
-    // Compute the B^2-4ac term.
-    APInt SqrtTerm(B);
-    SqrtTerm *= B;
-    SqrtTerm -= Four * (A * C);
-
-    if (SqrtTerm.isNegative()) {
-      // The loop is provably infinite.
-      return None;
-    }
 
-    // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
-    // integer value or else APInt::sqrt() will assert.
-    APInt SqrtVal(SqrtTerm.sqrt());
+  // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
 
-    // Compute the two solutions for the quadratic formula.
-    // The divisions must be performed as signed divisions.
-    APInt NegB(-B);
-    APInt TwoA(A << 1);
-    if (TwoA.isMinValue())
-      return None;
+  // The A coefficient is N/2
+  APInt A = N.sdiv(Two);
+
+  // The B coefficient is M-N/2
+  APInt B = M;
+  B -= A; // A is the same as N/2.
 
-    LLVMContext &Context = SE.getContext();
+  // The C coefficient is L.
+  const APInt& C = L;
 
-    ConstantInt *Solution1 =
-      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
-    ConstantInt *Solution2 =
-      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+  // Compute the B^2-4ac term.
+  APInt SqrtTerm = B;
+  SqrtTerm *= B;
+  SqrtTerm -= 4 * (A * C);
 
-    return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
-                          cast<SCEVConstant>(SE.getConstant(Solution2)));
-  } // end APIntOps namespace
+  if (SqrtTerm.isNegative()) {
+    // The loop is provably infinite.
+    return None;
+  }
+
+  // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+  // integer value or else APInt::sqrt() will assert.
+  APInt SqrtVal = SqrtTerm.sqrt();
+
+  // Compute the two solutions for the quadratic formula.
+  // The divisions must be performed as signed divisions.
+  APInt NegB = -std::move(B);
+  APInt TwoA = std::move(A);
+  TwoA <<= 1;
+  if (TwoA.isNullValue())
+    return None;
+
+  LLVMContext &Context = SE.getContext();
+
+  ConstantInt *Solution1 =
+    ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+  ConstantInt *Solution2 =
+    ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+  return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
+                        cast<SCEVConstant>(SE.getConstant(Solution2)));
 }
 
 ScalarEvolution::ExitLimit
@@ -7197,7 +7979,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
   // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
   // We have not yet seen any such cases.
   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (!StepC || StepC->getValue()->equalsInt(0))
+  if (!StepC || StepC->getValue()->isZero())
     return getCouldNotCompute();
 
   // For positive steps (counting up until unsigned overflow):
@@ -7211,8 +7993,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
   // Handle unitary steps, which cannot wraparound.
   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
   //   N = Distance (as unsigned)
-  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
-    APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax();
+  if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
+    APInt MaxBECount = getUnsignedRangeMax(Distance);
 
     // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
     // we end up with a loop whose backedge-taken count is n - 1.  Detect this
@@ -7233,62 +8015,6 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
     return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
   }
 
-  // As a special case, handle the instance where Step is a positive power of
-  // two. In this case, determining whether Step divides Distance evenly can be
-  // done by counting and comparing the number of trailing zeros of Step and
-  // Distance.
-  if (!CountDown) {
-    const APInt &StepV = StepC->getAPInt();
-    // StepV.isPowerOf2() returns true if StepV is an positive power of two.  It
-    // also returns true if StepV is maximally negative (eg, INT_MIN), but that
-    // case is not handled as this code is guarded by !CountDown.
-    if (StepV.isPowerOf2() &&
-        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
-      // Here we've constrained the equation to be of the form
-      //
-      //   2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W)  ... (0)
-      //
-      // where we're operating on a W bit wide integer domain and k is
-      // non-negative.  The smallest unsigned solution for X is the trip count.
-      //
-      // (0) is equivalent to:
-      //
-      //      2^(N + k) * Distance' - 2^N * X = L * 2^W
-      // <=>  2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
-      // <=>  2^k * Distance' - X = L * 2^(W - N)
-      // <=>  2^k * Distance'     = L * 2^(W - N) + X    ... (1)
-      //
-      // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
-      // by 2^(W - N).
-      //
-      // <=>  X = 2^k * Distance' URem 2^(W - N)   ... (2)
-      //
-      // E.g. say we're solving
-      //
-      //   2 * Val = 2 * X  (in i8)   ... (3)
-      //
-      // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
-      //
-      // Note: It is tempting to solve (3) by setting X = Val, but Val is not
-      // necessarily the smallest unsigned value of X that satisfies (3).
-      // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
-      // is i8 1, not i8 -127
-
-      const auto *ModuloResult = getUDivExactExpr(Distance, Step);
-
-      // Since SCEV does not have a URem node, we construct one using a truncate
-      // and a zero extend.
-
-      unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
-      auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
-      auto *WideTy = Distance->getType();
-
-      const SCEV *Limit =
-          getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
-      return ExitLimit(Limit, Limit, false, Predicates);
-    }
-  }
-
   // If the condition controls loop exit (the loop exits only if the expression
   // is true) and the addition is no-wrap we can use unsigned divide to
   // compute the backedge count.  In this case, the step may not divide the
@@ -7298,16 +8024,20 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
       loopHasNoAbnormalExits(AddRec->getLoop())) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact, false, Predicates);
+    const SCEV *Max =
+        Exact == getCouldNotCompute()
+            ? Exact
+            : getConstant(getUnsignedRangeMax(Exact));
+    return ExitLimit(Exact, Max, false, Predicates);
   }
 
-  // Then, try to solve the above equation provided that Start is constant.
-  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
-    const SCEV *E = SolveLinEquationWithOverflow(
-        StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
-    return ExitLimit(E, E, false, Predicates);
-  }
-  return getCouldNotCompute();
+  // Solve the general equation.
+  const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
+                                               getNegativeSCEV(Start), *this);
+  const SCEV *M = E == getCouldNotCompute()
+                      ? E
+                      : getConstant(getUnsignedRangeMax(E));
+  return ExitLimit(E, M, false, Predicates);
 }
 
 ScalarEvolution::ExitLimit
@@ -7319,7 +8049,7 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
   // If the value is a constant, check to see if it is known to be non-zero
   // already.  If so, the backedge will execute zero times.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
-    if (!C->getValue()->isNullValue())
+    if (!C->getValue()->isZero())
       return getZero(C->getType());
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
@@ -7503,12 +8233,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // adding or subtracting 1 from one of the operands.
   switch (Pred) {
   case ICmpInst::ICMP_SLE:
-    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+    if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
       Changed = true;
-    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+    } else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
@@ -7516,12 +8246,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     }
     break;
   case ICmpInst::ICMP_SGE:
-    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+    if (!getSignedRangeMin(RHS).isMinSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
       Changed = true;
-    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+    } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
@@ -7529,23 +8259,23 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     }
     break;
   case ICmpInst::ICMP_ULE:
-    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+    if (!getUnsignedRangeMax(RHS).isMaxValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
-    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+    } else if (!getUnsignedRangeMin(LHS).isMinValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     }
     break;
   case ICmpInst::ICMP_UGE:
-    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+    if (!getUnsignedRangeMin(RHS).isMinValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
-    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+    } else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
                        SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_UGT;
@@ -7579,19 +8309,19 @@ trivially_false:
 }
 
 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
-  return getSignedRange(S).getSignedMax().isNegative();
+  return getSignedRangeMax(S).isNegative();
 }
 
 bool ScalarEvolution::isKnownPositive(const SCEV *S) {
-  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+  return getSignedRangeMin(S).isStrictlyPositive();
 }
 
 bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
-  return !getSignedRange(S).getSignedMin().isNegative();
+  return !getSignedRangeMin(S).isNegative();
 }
 
 bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
-  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+  return !getSignedRangeMax(S).isStrictlyPositive();
 }
 
 bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
@@ -7822,6 +8552,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
 
   case ICmpInst::ICMP_SGE:
     std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     // X s<= (X + C)<nsw> if C >= 0
     if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
@@ -7835,6 +8566,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
 
   case ICmpInst::ICMP_SGT:
     std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLT:
     // X s< (X + C)<nsw> if C > 0
     if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
@@ -8175,7 +8907,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
     // predicate we're interested in folding.
 
     APInt Min = ICmpInst::isSigned(Pred) ?
-        getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+        getSignedRangeMin(V) : getUnsignedRangeMin(V);
 
     if (Min == C->getAPInt()) {
       // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
@@ -8192,6 +8924,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
           if (isImpliedCondOperands(Pred, LHS, RHS, V,
                                     getConstant(SharperMin)))
             return true;
+          LLVM_FALLTHROUGH;
 
         case ICmpInst::ICMP_SGT:
         case ICmpInst::ICMP_UGT:
@@ -8206,6 +8939,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
 
           if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
             return true;
+          LLVM_FALLTHROUGH;
 
         default:
           // No change
@@ -8488,19 +9222,161 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
   llvm_unreachable("covered switch fell through?!");
 }
 
+bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS,
+                                             unsigned Depth) {
+  assert(getTypeSizeInBits(LHS->getType()) ==
+             getTypeSizeInBits(RHS->getType()) &&
+         "LHS and RHS have different sizes?");
+  assert(getTypeSizeInBits(FoundLHS->getType()) ==
+             getTypeSizeInBits(FoundRHS->getType()) &&
+         "FoundLHS and FoundRHS have different sizes?");
+  // We want to avoid hurting the compile time with analysis of too big trees.
+  if (Depth > MaxSCEVOperationsImplicationDepth)
+    return false;
+  // We only want to work with ICMP_SGT comparison so far.
+  // TODO: Extend to ICMP_UGT?
+  if (Pred == ICmpInst::ICMP_SLT) {
+    Pred = ICmpInst::ICMP_SGT;
+    std::swap(LHS, RHS);
+    std::swap(FoundLHS, FoundRHS);
+  }
+  if (Pred != ICmpInst::ICMP_SGT)
+    return false;
+
+  auto GetOpFromSExt = [&](const SCEV *S) {
+    if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
+      return Ext->getOperand();
+    // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
+    // the constant in some cases.
+    return S;
+  };
+
+  // Acquire values from extensions.
+  auto *OrigFoundLHS = FoundLHS;
+  LHS = GetOpFromSExt(LHS);
+  FoundLHS = GetOpFromSExt(FoundLHS);
+
+  // Is the SGT predicate can be proved trivially or using the found context.
+  auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
+    return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
+           isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
+                                  FoundRHS, Depth + 1);
+  };
+
+  if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
+    // We want to avoid creation of any new non-constant SCEV. Since we are
+    // going to compare the operands to RHS, we should be certain that we don't
+    // need any size extensions for this. So let's decline all cases when the
+    // sizes of types of LHS and RHS do not match.
+    // TODO: Maybe try to get RHS from sext to catch more cases?
+    if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
+      return false;
+
+    // Should not overflow.
+    if (!LHSAddExpr->hasNoSignedWrap())
+      return false;
+
+    auto *LL = LHSAddExpr->getOperand(0);
+    auto *LR = LHSAddExpr->getOperand(1);
+    auto *MinusOne = getNegativeSCEV(getOne(RHS->getType()));
+
+    // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
+    auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
+      return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
+    };
+    // Try to prove the following rule:
+    // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
+    // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
+    if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
+      return true;
+  } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
+    Value *LL, *LR;
+    // FIXME: Once we have SDiv implemented, we can get rid of this matching.
+    using namespace llvm::PatternMatch;
+    if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
+      // Rules for division.
+      // We are going to perform some comparisons with Denominator and its
+      // derivative expressions. In general case, creating a SCEV for it may
+      // lead to a complex analysis of the entire graph, and in particular it
+      // can request trip count recalculation for the same loop. This would
+      // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
+      // this, we only want to create SCEVs that are constants in this section.
+      // So we bail if Denominator is not a constant.
+      if (!isa<ConstantInt>(LR))
+        return false;
+
+      auto *Denominator = cast<SCEVConstant>(getSCEV(LR));
+
+      // We want to make sure that LHS = FoundLHS / Denominator. If it is so,
+      // then a SCEV for the numerator already exists and matches with FoundLHS.
+      auto *Numerator = getExistingSCEV(LL);
+      if (!Numerator || Numerator->getType() != FoundLHS->getType())
+        return false;
+
+      // Make sure that the numerator matches with FoundLHS and the denominator
+      // is positive.
+      if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator))
+        return false;
+
+      auto *DTy = Denominator->getType();
+      auto *FRHSTy = FoundRHS->getType();
+      if (DTy->isPointerTy() != FRHSTy->isPointerTy())
+        // One of types is a pointer and another one is not. We cannot extend
+        // them properly to a wider type, so let us just reject this case.
+        // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
+        // to avoid this check.
+        return false;
+
+      // Given that:
+      // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
+      auto *WTy = getWiderType(DTy, FRHSTy);
+      auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
+      auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);
+
+      // Try to prove the following rule:
+      // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
+      // For example, given that FoundLHS > 2. It means that FoundLHS is at
+      // least 3. If we divide it by Denominator < 4, we will have at least 1.
+      auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
+      if (isKnownNonPositive(RHS) &&
+          IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
+        return true;
+
+      // Try to prove the following rule:
+      // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
+      // For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
+      // If we divide it by Denominator > 2, then:
+      // 1. If FoundLHS is negative, then the result is 0.
+      // 2. If FoundLHS is non-negative, then the result is non-negative.
+      // Anyways, the result is non-negative.
+      auto *MinusOne = getNegativeSCEV(getOne(WTy));
+      auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
+      if (isKnownNegative(RHS) &&
+          IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+bool
+ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
+                                           const SCEV *LHS, const SCEV *RHS) {
+  return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
+         IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+         IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+         isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
+}
+
 bool
 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
                                              const SCEV *LHS, const SCEV *RHS,
                                              const SCEV *FoundLHS,
                                              const SCEV *FoundRHS) {
-  auto IsKnownPredicateFull =
-      [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
-    return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
-           IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
-           IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
-           isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
-  };
-
   switch (Pred) {
   default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   case ICmpInst::ICMP_EQ:
@@ -8510,30 +9386,34 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
     break;
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
-    if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
-        IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+    if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
-    if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
-        IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+    if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
-    if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
-        IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+    if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
       return true;
     break;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
-    if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
-        IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+    if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
       return true;
     break;
   }
 
+  // Maybe it can be proved via operations?
+  if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
   return false;
 }
 
@@ -8551,7 +9431,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
   if (!Addend)
     return false;
 
-  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
+  const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
 
   // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
   // antecedent "`FoundLHS` `Pred` `FoundRHS`".
@@ -8563,7 +9443,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
 
   // We can also compute the range of values for `LHS` that satisfy the
   // consequent, "`LHS` `Pred` `RHS`":
-  APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
+  const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
   ConstantRange SatisfyingLHSRange =
       ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
 
@@ -8582,22 +9462,20 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
   const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
-    APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+    APInt MaxRHS = getSignedRangeMax(RHS);
     APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
-    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
-                                .getSignedMax();
+    APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
 
     // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
-    return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+    return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
   }
 
-  APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+  APInt MaxRHS = getUnsignedRangeMax(RHS);
   APInt MaxValue = APInt::getMaxValue(BitWidth);
-  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
-                              .getUnsignedMax();
+  APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
 
   // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
-  return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+  return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
 }
 
 bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
@@ -8608,22 +9486,20 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
   const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
-    APInt MinRHS = getSignedRange(RHS).getSignedMin();
+    APInt MinRHS = getSignedRangeMin(RHS);
     APInt MinValue = APInt::getSignedMinValue(BitWidth);
-    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
-                               .getSignedMax();
+    APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
 
     // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
-    return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
+    return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
   }
 
-  APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+  APInt MinRHS = getUnsignedRangeMin(RHS);
   APInt MinValue = APInt::getMinValue(BitWidth);
-  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
-                            .getUnsignedMax();
+  APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
 
   // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
-  return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+  return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
 }
 
 const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
@@ -8759,8 +9635,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
   } else {
     // Calculate the maximum backedge count based on the range of values
     // permitted by Start, End, and Stride.
-    APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
-                              : getUnsignedRange(Start).getUnsignedMin();
+    APInt MinStart = IsSigned ? getSignedRangeMin(Start)
+                              : getUnsignedRangeMin(Start);
 
     unsigned BitWidth = getTypeSizeInBits(LHS->getType());
 
@@ -8768,8 +9644,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
     if (PositiveStride)
       StrideForMaxBECount =
-        IsSigned ? getSignedRange(Stride).getSignedMin()
-                 : getUnsignedRange(Stride).getUnsignedMin();
+        IsSigned ? getSignedRangeMin(Stride)
+                 : getUnsignedRangeMin(Stride);
     else
       // Using a stride of 1 is safe when computing max backedge taken count for
       // a loop with unknown stride.
@@ -8783,15 +9659,16 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // the case End = RHS. This is safe because in the other case (End - Start)
     // is zero, leading to a zero maximum backedge taken count.
     APInt MaxEnd =
-      IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
-               : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+      IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit)
+               : APIntOps::umin(getUnsignedRangeMax(RHS), Limit);
 
     MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
                                 getConstant(StrideForMaxBECount), false);
   }
 
-  if (isa<SCEVCouldNotCompute>(MaxBECount))
-    MaxBECount = BECount;
+  if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+      !isa<SCEVCouldNotCompute>(BECount))
+    MaxBECount = getConstant(getUnsignedRangeMax(BECount));
 
   return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
 }
@@ -8842,11 +9719,11 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
 
   const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
 
-  APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
-                            : getUnsignedRange(Start).getUnsignedMax();
+  APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
+                            : getUnsignedRangeMax(Start);
 
-  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
-                             : getUnsignedRange(Stride).getUnsignedMin();
+  APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
+                             : getUnsignedRangeMin(Stride);
 
   unsigned BitWidth = getTypeSizeInBits(LHS->getType());
   APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
@@ -8856,8 +9733,8 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
   // the case End = RHS. This is safe because in the other case (Start - End)
   // is zero, leading to a zero maximum backedge taken count.
   APInt MinEnd =
-    IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
-             : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+    IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
+             : APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
 
 
   const SCEV *MaxBECount = getCouldNotCompute();
@@ -8914,9 +9791,8 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
     // the upper value of the range must be the first possible exit value.
     // If A is negative then the lower of the range is the last possible loop
     // value.  Also note that we already checked for a full range.
-    APInt One(BitWidth,1);
     APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
-    APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+    APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower();
 
     // The exit value should be (End+A)/A.
     APInt ExitVal = (End + A).udiv(A);
@@ -8932,7 +9808,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
            EvaluateConstantChrecAtConstant(this,
-           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+           ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
   } else if (isQuadratic()) {
@@ -9083,8 +9959,11 @@ struct SCEVCollectAddRecMultiplies {
       bool HasAddRec = false;
       SmallVector<const SCEV *, 0> Operands;
       for (auto Op : Mul->operands()) {
-        if (isa<SCEVUnknown>(Op)) {
+        const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
+        if (Unknown && !isa<CallInst>(Unknown->getValue())) {
           Operands.push_back(Op);
+        } else if (Unknown) {
+          HasAddRec = true;
         } else {
           bool ContainsAddRec;
           SCEVHasAddRec ContiansAddRec(ContainsAddRec);
@@ -9238,7 +10117,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
 
 void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
                                           SmallVectorImpl<const SCEV *> &Sizes,
-                                          const SCEV *ElementSize) const {
+                                          const SCEV *ElementSize) {
   if (Terms.size() < 1 || !ElementSize)
     return;
 
@@ -9254,7 +10133,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
     });
 
   // Remove duplicates.
-  std::sort(Terms.begin(), Terms.end());
+  array_pod_sort(Terms.begin(), Terms.end());
   Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
 
   // Put larger terms first.
@@ -9262,13 +10141,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
     return numberOfTerms(LHS) > numberOfTerms(RHS);
   });
 
-  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
-
   // Try to divide all terms by the element size. If term is not divisible by
   // element size, proceed with the original term.
   for (const SCEV *&Term : Terms) {
     const SCEV *Q, *R;
-    SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+    SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
     if (!Q->isZero())
       Term = Q;
   }
@@ -9277,7 +10154,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
 
   // Remove constant factors.
   for (const SCEV *T : Terms)
-    if (const SCEV *NewT = removeConstantFactors(SE, T))
+    if (const SCEV *NewT = removeConstantFactors(*this, T))
       NewTerms.push_back(NewT);
 
   DEBUG({
@@ -9286,8 +10163,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
         dbgs() << *T << "\n";
     });
 
-  if (NewTerms.empty() ||
-      !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+  if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
     Sizes.clear();
     return;
   }
@@ -9524,6 +10400,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
       ValueExprMap(std::move(Arg.ValueExprMap)),
       PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
       WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+      MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
       PredicatedBackedgeTakenCounts(
           std::move(Arg.PredicatedBackedgeTakenCounts)),
@@ -9538,6 +10415,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
       UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
       UniquePreds(std::move(Arg.UniquePreds)),
       SCEVAllocator(std::move(Arg.SCEVAllocator)),
+      PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
       FirstUnknown(Arg.FirstUnknown) {
   Arg.FirstUnknown = nullptr;
 }
@@ -9621,6 +10499,13 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
     OS << "Unpredictable predicated backedge-taken count. ";
   }
   OS << "\n";
+
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << "Loop ";
+    L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+    OS << ": ";
+    OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
+  }
 }
 
 static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
@@ -9929,6 +10814,16 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   SignedRanges.erase(S);
   ExprValueMap.erase(S);
   HasRecMap.erase(S);
+  MinTrailingZerosCache.erase(S);
+
+  for (auto I = PredicatedSCEVRewrites.begin(); 
+       I != PredicatedSCEVRewrites.end();) {
+    std::pair<const SCEV *, const Loop *> Entry = I->first;
+    if (Entry.first == S)
+      PredicatedSCEVRewrites.erase(I++);
+    else
+      ++I;
+  }
 
   auto RemoveSCEVFromBackedgeMap =
       [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
@@ -9946,84 +10841,75 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
 }
 
-typedef DenseMap<const Loop *, std::string> VerifyMap;
+void ScalarEvolution::verify() const {
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+  ScalarEvolution SE2(F, TLI, AC, DT, LI);
 
-/// replaceSubString - Replaces all occurrences of From in Str with To.
-static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
-  size_t Pos = 0;
-  while ((Pos = Str.find(From, Pos)) != std::string::npos) {
-    Str.replace(Pos, From.size(), To.data(), To.size());
-    Pos += To.size();
-  }
-}
+  SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());
 
-/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
-static void
-getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
-  std::string &S = Map[L];
-  if (S.empty()) {
-    raw_string_ostream OS(S);
-    SE.getBackedgeTakenCount(L)->print(OS);
+  // Map's SCEV expressions from one ScalarEvolution "universe" to another.
+  struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
+    const SCEV *visitConstant(const SCEVConstant *Constant) {
+      return SE.getConstant(Constant->getAPInt());
+    }
+    const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+      return SE.getUnknown(Expr->getValue());
+    }
 
-    // false and 0 are semantically equivalent. This can happen in dead loops.
-    replaceSubString(OS.str(), "false", "0");
-    // Remove wrap flags, their use in SCEV is highly fragile.
-    // FIXME: Remove this when SCEV gets smarter about them.
-    replaceSubString(OS.str(), "<nw>", "");
-    replaceSubString(OS.str(), "<nsw>", "");
-    replaceSubString(OS.str(), "<nuw>", "");
-  }
+    const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+      return SE.getCouldNotCompute();
+    }
+    SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
+  };
 
-  for (auto *R : reverse(*L))
-    getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
-}
+  SCEVMapper SCM(SE2);
 
-void ScalarEvolution::verify() const {
-  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+  while (!LoopStack.empty()) {
+    auto *L = LoopStack.pop_back_val();
+    LoopStack.insert(LoopStack.end(), L->begin(), L->end());
 
-  // Gather stringified backedge taken counts for all loops using SCEV's caches.
-  // FIXME: It would be much better to store actual values instead of strings,
-  //        but SCEV pointers will change if we drop the caches.
-  VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
-  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
-    getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+    auto *CurBECount = SCM.visit(
+        const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
+    auto *NewBECount = SE2.getBackedgeTakenCount(L);
 
-  // Gather stringified backedge taken counts for all loops using a fresh
-  // ScalarEvolution object.
-  ScalarEvolution SE2(F, TLI, AC, DT, LI);
-  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
-    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
-
-  // Now compare whether they're the same with and without caches. This allows
-  // verifying that no pass changed the cache.
-  assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
-         "New loops suddenly appeared!");
-
-  for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
-                           OldE = BackedgeDumpsOld.end(),
-                           NewI = BackedgeDumpsNew.begin();
-       OldI != OldE; ++OldI, ++NewI) {
-    assert(OldI->first == NewI->first && "Loop order changed!");
-
-    // Compare the stringified SCEVs. We don't care if undef backedgetaken count
-    // changes.
-    // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
-    // means that a pass is buggy or SCEV has to learn a new pattern but is
-    // usually not harmful.
-    if (OldI->second != NewI->second &&
-        OldI->second.find("undef") == std::string::npos &&
-        NewI->second.find("undef") == std::string::npos &&
-        OldI->second != "***COULDNOTCOMPUTE***" &&
-        NewI->second != "***COULDNOTCOMPUTE***") {
-      dbgs() << "SCEVValidator: SCEV for loop '"
-             << OldI->first->getHeader()->getName()
-             << "' changed from '" << OldI->second
-             << "' to '" << NewI->second << "'!\n";
+    if (CurBECount == SE2.getCouldNotCompute() ||
+        NewBECount == SE2.getCouldNotCompute()) {
+      // NB! This situation is legal, but is very suspicious -- whatever pass
+      // change the loop to make a trip count go from could not compute to
+      // computable or vice-versa *should have* invalidated SCEV.  However, we
+      // choose not to assert here (for now) since we don't want false
+      // positives.
+      continue;
+    }
+
+    if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
+      // SCEV treats "undef" as an unknown but consistent value (i.e. it does
+      // not propagate undef aggressively).  This means we can (and do) fail
+      // verification in cases where a transform makes the trip count of a loop
+      // go from "undef" to "undef+1" (say).  The transform is fine, since in
+      // both cases the loop iterates "undef" times, but SCEV thinks we
+      // increased the trip count of the loop by 1 incorrectly.
+      continue;
+    }
+
+    if (SE.getTypeSizeInBits(CurBECount->getType()) >
+        SE.getTypeSizeInBits(NewBECount->getType()))
+      NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
+    else if (SE.getTypeSizeInBits(CurBECount->getType()) <
+             SE.getTypeSizeInBits(NewBECount->getType()))
+      CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
+
+    auto *ConstantDelta =
+        dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
+
+    if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
+      dbgs() << "Trip Count Changed!\n";
+      dbgs() << "Old: " << *CurBECount << "\n";
+      dbgs() << "New: " << *NewBECount << "\n";
+      dbgs() << "Delta: " << *ConstantDelta << "\n";
       std::abort();
     }
   }
-
-  // TODO: Verify more things.
 }
 
 bool ScalarEvolution::invalidate(
@@ -10098,10 +10984,11 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
-const SCEVPredicate *
-ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
-                                   const SCEVConstant *RHS) {
+const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
+                                                        const SCEV *RHS) {
   FoldingSetNodeID ID;
+  assert(LHS->getType() == RHS->getType() &&
+         "Type mismatch between LHS and RHS");
   // Unique this node based on the arguments
   ID.AddInteger(SCEVPredicate::P_Equal);
   ID.AddPointer(LHS);
@@ -10164,8 +11051,7 @@ public:
           if (IPred->getLHS() == Expr)
             return IPred->getRHS();
     }
-
-    return Expr;
+    return convertToAddRecWithPreds(Expr);
   }
 
   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
@@ -10201,17 +11087,41 @@ public:
   }
 
 private:
-  bool addOverflowAssumption(const SCEVAddRecExpr *AR,
-                             SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
-    auto *A = SE.getWrapPredicate(AR, AddedFlags);
+  bool addOverflowAssumption(const SCEVPredicate *P) {
     if (!NewPreds) {
       // Check if we've already made this assumption.
-      return Pred && Pred->implies(A);
+      return Pred && Pred->implies(P);
     }
-    NewPreds->insert(A);
+    NewPreds->insert(P);
     return true;
   }
 
+  bool addOverflowAssumption(const SCEVAddRecExpr *AR,
+                             SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
+    auto *A = SE.getWrapPredicate(AR, AddedFlags);
+    return addOverflowAssumption(A);
+  }
+
+  // If \p Expr represents a PHINode, we try to see if it can be represented
+  // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible 
+  // to add this predicate as a runtime overflow check, we return the AddRec.
+  // If \p Expr does not meet these conditions (is not a PHI node, or we 
+  // couldn't create an AddRec for it, or couldn't add the predicate), we just 
+  // return \p Expr.
+  const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
+    if (!isa<PHINode>(Expr->getValue()))
+      return Expr;
+    Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+    PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
+    if (!PredicatedRewrite)
+      return Expr;
+    for (auto *P : PredicatedRewrite->second){
+      if (!addOverflowAssumption(P))
+        return Expr;
+    }
+    return PredicatedRewrite->first;
+  }
+  
   SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
   SCEVUnionPredicate *Pred;
   const Loop *L;
@@ -10248,9 +11158,11 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
     : FastID(ID), Kind(Kind) {}
 
 SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
-                                       const SCEVUnknown *LHS,
-                                       const SCEVConstant *RHS)
-    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+                                       const SCEV *LHS, const SCEV *RHS)
+    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
+  assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
+  assert(LHS != RHS && "LHS and RHS are the same SCEV");
+}
 
 bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
   const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index d15a7db..47bdac0 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -748,18 +748,56 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
   // Emit instructions to mul all the operands. Hoist as much as possible
   // out of loops.
   Value *Prod = nullptr;
-  for (const auto &I : OpsAndLoops) {
-    const SCEV *Op = I.second;
+  auto I = OpsAndLoops.begin();
+
+  // Expand the calculation of X pow N in the following manner:
+  // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
+  // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
+  const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() {
+    auto E = I;
+    // Calculate how many times the same operand from the same loop is included
+    // into this power.
+    uint64_t Exponent = 0;
+    const uint64_t MaxExponent = UINT64_MAX >> 1;
+    // No one sane will ever try to calculate such huge exponents, but if we
+    // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
+    // below when the power of 2 exceeds our Exponent, and we want it to be
+    // 1u << 31 at most to not deal with unsigned overflow.
+    while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
+      ++Exponent;
+      ++E;
+    }
+    assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
+
+    // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
+    // that are needed into the result.
+    Value *P = expandCodeFor(I->second, Ty);
+    Value *Result = nullptr;
+    if (Exponent & 1)
+      Result = P;
+    for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
+      P = InsertBinop(Instruction::Mul, P, P);
+      if (Exponent & BinExp)
+        Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
+    }
+
+    I = E;
+    assert(Result && "Nothing was expanded?");
+    return Result;
+  };
+
+  while (I != OpsAndLoops.end()) {
     if (!Prod) {
       // This is the first operand. Just expand it.
-      Prod = expand(Op);
-    } else if (Op->isAllOnesValue()) {
+      Prod = ExpandOpBinPowN();
+    } else if (I->second->isAllOnesValue()) {
       // Instead of doing a multiply by negative one, just do a negate.
       Prod = InsertNoopCastOfTo(Prod, Ty);
       Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+      ++I;
     } else {
       // A simple mul.
-      Value *W = expandCodeFor(Op, Ty);
+      Value *W = ExpandOpBinPowN();
       Prod = InsertNoopCastOfTo(Prod, Ty);
       // Canonicalize a constant to the RHS.
       if (isa<Constant>(Prod)) std::swap(Prod, W);
@@ -1268,8 +1306,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   if (PostIncLoops.count(L)) {
     PostIncLoopSet Loops;
     Loops.insert(L);
-    Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
-        Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
+    Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
@@ -1306,12 +1343,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Expand the core addrec. If we need post-loop scaling, force it to
   // expand to an integer type to avoid the need for additional casting.
   Type *ExpandTy = PostLoopScale ? IntTy : STy;
+  // We can't use a pointer type for the addrec if the pointer type is
+  // non-integral.
+  Type *AddRecPHIExpandTy =
+      DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy;
+
   // In some cases, we decide to reuse an existing phi node but need to truncate
   // it and/or invert the step.
   Type *TruncTy = nullptr;
   bool InvertStep = false;
-  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
-                                          TruncTy, InvertStep);
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy,
+                                          IntTy, TruncTy, InvertStep);
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
@@ -1384,8 +1426,15 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Re-apply any non-loop-dominating offset.
   if (PostLoopOffset) {
     if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
-      const SCEV *const OffsetArray[1] = { PostLoopOffset };
-      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+      if (Result->getType()->isIntegerTy()) {
+        Value *Base = expandCodeFor(PostLoopOffset, ExpandTy);
+        const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)};
+        Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base);
+      } else {
+        const SCEV *const OffsetArray[1] = {PostLoopOffset};
+        Result =
+            expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result);
+      }
     } else {
       Result = InsertNoopCastOfTo(Result, IntTy);
       Result = Builder.CreateAdd(Result,
@@ -1773,9 +1822,10 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
 ///
 /// This does not depend on any SCEVExpander state but should be used in
 /// the same context that SCEVExpander is used.
-unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                           SmallVectorImpl<WeakVH> &DeadInsts,
-                                           const TargetTransformInfo *TTI) {
+unsigned
+SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+                                  const TargetTransformInfo *TTI) {
   // Find integer phis in order of increasing width.
   SmallVector<PHINode*, 8> Phis;
   for (auto &I : *L->getHeader()) {
@@ -1800,7 +1850,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   // so narrow phis can reuse them.
   for (PHINode *Phi : Phis) {
     auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
-      if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+      if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
         return V;
       if (!SE.isSCEVable(PN->getType()))
         return nullptr;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index c1f9503..3740039 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -12,243 +12,107 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 using namespace llvm;
 
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value.  If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
-                                       const Loop *L, DominatorTree *DT) {
-  // If the user is in the loop, use the preinc value.
-  if (L->contains(User)) return false;
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  if (!LatchBlock)
-    return false;
-
-  // Ok, the user is outside of the loop.  If it is dominated by the latch
-  // block, use the post-inc value.
-  if (DT->dominates(LatchBlock, User->getParent()))
-    return true;
-
-  // There is one case we have to be careful of: PHI nodes.  These little guys
-  // can live in blocks that are not dominated by the latch block, but (since
-  // their uses occur in the predecessor block, not the block the PHI lives in)
-  // should still use the post-inc value.  Check for this case now.
-  PHINode *PN = dyn_cast<PHINode>(User);
-  if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
-
-  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
-  // to a block that is not dominated by the latch block, give up and use the
-  // preincremented value.
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == Operand &&
-        !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
-      return false;
-
-  // Okay, all uses of Operand by PN are in predecessor blocks that really are
-  // dominated by the latch block.  Use the post-incremented value.
-  return true;
-}
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+  /// Normalize - Normalize according to the given loops.
+  Normalize,
+  /// Denormalize - Perform the inverse transform on the expression with the
+  /// given loop set.
+  Denormalize
+};
 
 namespace {
-
-/// Hold the state used during post-inc expression transformation, including a
-/// map of transformed expressions.
-class PostIncTransform {
-  TransformKind Kind;
-  PostIncLoopSet &Loops;
-  ScalarEvolution &SE;
-  DominatorTree &DT;
-
-  DenseMap<const SCEV*, const SCEV*> Transformed;
-
-public:
-  PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
-                   ScalarEvolution &se, DominatorTree &dt):
-    Kind(kind), Loops(loops), SE(se), DT(dt) {}
-
-  const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
-                               Value *OperandValToReplace);
-
-protected:
-  const SCEV *TransformImpl(const SCEV *S, Instruction *User,
-                            Value *OperandValToReplace);
+struct NormalizeDenormalizeRewriter
+    : public SCEVRewriteVisitor<NormalizeDenormalizeRewriter> {
+  const TransformKind Kind;
+
+  // NB! Pred is a function_ref.  Storing it here is okay only because
+  // we're careful about the lifetime of NormalizeDenormalizeRewriter.
+  const NormalizePredTy Pred;
+
+  NormalizeDenormalizeRewriter(TransformKind Kind, NormalizePredTy Pred,
+                               ScalarEvolution &SE)
+      : SCEVRewriteVisitor<NormalizeDenormalizeRewriter>(SE), Kind(Kind),
+        Pred(Pred) {}
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr);
 };
-
 } // namespace
 
-/// Implement post-inc transformation for all valid expression types.
-const SCEV *PostIncTransform::
-TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
-
-  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
-    const SCEV *O = X->getOperand();
-    const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
-    if (O != N)
-      switch (S->getSCEVType()) {
-      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
-      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
-      case scTruncate: return SE.getTruncateExpr(N, S->getType());
-      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
-      }
-    return S;
+const SCEV *
+NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) {
+  SmallVector<const SCEV *, 8> Operands;
+
+  transform(AR->operands(), std::back_inserter(Operands),
+            [&](const SCEV *Op) { return visit(Op); });
+
+  if (!Pred(AR))
+    return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
+
+  // Normalization and denormalization are fancy names for decrementing and
+  // incrementing a SCEV expression with respect to a set of loops.  Since
+  // Pred(AR) has returned true, we know we need to normalize or denormalize AR
+  // with respect to its loop.
+
+  if (Kind == Denormalize) {
+    // Denormalization / "partial increment" is essentially the same as \c
+    // SCEVAddRecExpr::getPostIncExpr.  Here we use an explicit loop to make the
+    // symmetry with Normalization clear.
+    for (int i = 0, e = Operands.size() - 1; i < e; i++)
+      Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]);
+  } else {
+    assert(Kind == Normalize && "Only two possibilities!");
+
+    // Normalization / "partial decrement" is a bit more subtle.  Since
+    // incrementing a SCEV expression (in general) changes the step of the SCEV
+    // expression as well, we cannot use the step of the current expression.
+    // Instead, we have to use the step of the very expression we're trying to
+    // compute!
+    //
+    // We solve the issue by recursively building up the result, starting from
+    // the "least significant" operand in the add recurrence:
+    //
+    // Base case:
+    //   Single operand add recurrence.  It's its own normalization.
+    //
+    // N-operand case:
+    //   {S_{N-1},+,S_{N-2},+,...,+,S_0} = S
+    //
+    //   Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its
+    //   normalization by induction.  We subtract the normalized step
+    //   recurrence from S_{N-1} to get the normalization of S.
+
+    for (int i = Operands.size() - 2; i >= 0; i--)
+      Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]);
   }
 
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // An addrec. This is the interesting part.
-    SmallVector<const SCEV *, 8> Operands;
-    const Loop *L = AR->getLoop();
-    // The addrec conceptually uses its operands at loop entry.
-    Instruction *LUser = &L->getHeader()->front();
-    // Transform each operand.
-    for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
-         I != E; ++I) {
-      Operands.push_back(TransformSubExpr(*I, LUser, nullptr));
-    }
-    // Conservatively use AnyWrap until/unless we need FlagNW.
-    const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
-    switch (Kind) {
-    case NormalizeAutodetect:
-      // Normalize this SCEV by subtracting the expression for the final step.
-      // We only allow affine AddRecs to be normalized, otherwise we would not
-      // be able to correctly denormalize.
-      // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
-      // Normalized form:   {-2,+,1,+,2}
-      // Denormalized form: {1,+,3,+,2}
-      //
-      // However, denormalization would use a different step expression than
-      // normalization (see getPostIncExpr), generating the wrong final
-      // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
-      if (AR->isAffine() &&
-          IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
-        const SCEV *TransformedStep =
-          TransformSubExpr(AR->getStepRecurrence(SE),
-                           User, OperandValToReplace);
-        Result = SE.getMinusSCEV(Result, TransformedStep);
-        Loops.insert(L);
-      }
-#if 0
-      // This assert is conceptually correct, but ScalarEvolution currently
-      // sometimes fails to canonicalize two equal SCEVs to exactly the same
-      // form. It's possibly a pessimization when this happens, but it isn't a
-      // correctness problem, so disable this assert for now.
-      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
-             "SCEV normalization is not invertible!");
-#endif
-      break;
-    case Normalize:
-      // We want to normalize step expression, because otherwise we might not be
-      // able to denormalize to the original expression.
-      //
-      // Here is an example what will happen if we don't normalize step:
-      //  ORIGINAL ISE:
-      //    {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
-      //  NORMALIZED ISE:
-      //    {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
-      //     (100 /u {0,+,1}<%bb16>)}<%bb25>
-      //  DENORMALIZED BACK ISE:
-      //    {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
-      //     (100 /u {1,+,1}<%bb16>)}<%bb25>
-      //  Note that the initial value changes after normalization +
-      //  denormalization, which isn't correct.
-      if (Loops.count(L)) {
-        const SCEV *TransformedStep =
-          TransformSubExpr(AR->getStepRecurrence(SE),
-                           User, OperandValToReplace);
-        Result = SE.getMinusSCEV(Result, TransformedStep);
-      }
-#if 0
-      // See the comment on the assert above.
-      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
-             "SCEV normalization is not invertible!");
-#endif
-      break;
-    case Denormalize:
-      // Here we want to normalize step expressions for the same reasons, as
-      // stated above.
-      if (Loops.count(L)) {
-        const SCEV *TransformedStep =
-          TransformSubExpr(AR->getStepRecurrence(SE),
-                           User, OperandValToReplace);
-        Result = SE.getAddExpr(Result, TransformedStep);
-      }
-      break;
-    }
-    return Result;
-  }
-
-  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
-    SmallVector<const SCEV *, 8> Operands;
-    bool Changed = false;
-    // Transform each operand.
-    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
-         I != E; ++I) {
-      const SCEV *O = *I;
-      const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
-      Changed |= N != O;
-      Operands.push_back(N);
-    }
-    // If any operand actually changed, return a transformed result.
-    if (Changed)
-      switch (S->getSCEVType()) {
-      case scAddExpr: return SE.getAddExpr(Operands);
-      case scMulExpr: return SE.getMulExpr(Operands);
-      case scSMaxExpr: return SE.getSMaxExpr(Operands);
-      case scUMaxExpr: return SE.getUMaxExpr(Operands);
-      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
-      }
-    return S;
-  }
-
-  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
-    const SCEV *LO = X->getLHS();
-    const SCEV *RO = X->getRHS();
-    const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
-    const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
-    if (LO != LN || RO != RN)
-      return SE.getUDivExpr(LN, RN);
-    return S;
-  }
-
-  llvm_unreachable("Unexpected SCEV kind!");
+  return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
 }
 
-/// Manage recursive transformation across an expression DAG. Revisiting
-/// expressions would lead to exponential recursion.
-const SCEV *PostIncTransform::
-TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
-
-  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
-    return S;
-
-  const SCEV *Result = Transformed.lookup(S);
-  if (Result)
-    return Result;
+const SCEV *llvm::normalizeForPostIncUse(const SCEV *S,
+                                         const PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE) {
+  auto Pred = [&](const SCEVAddRecExpr *AR) {
+    return Loops.count(AR->getLoop());
+  };
+  return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
+}
 
-  Result = TransformImpl(S, User, OperandValToReplace);
-  Transformed[S] = Result;
-  return Result;
+const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
+                                           ScalarEvolution &SE) {
+  return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
 }
 
-/// Top level driver for transforming an expression DAG into its requested
-/// post-inc form (either "Normalized" or "Denormalized").
-const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
-                                         const SCEV *S,
-                                         Instruction *User,
-                                         Value *OperandValToReplace,
-                                         PostIncLoopSet &Loops,
-                                         ScalarEvolution &SE,
-                                         DominatorTree &DT) {
-  PostIncTransform Transform(Kind, Loops, SE, DT);
-  return Transform.TransformSubExpr(S, User, OperandValToReplace);
+const SCEV *llvm::denormalizeForPostIncUse(const SCEV *S,
+                                           const PostIncLoopSet &Loops,
+                                           ScalarEvolution &SE) {
+  auto Pred = [&](const SCEVAddRecExpr *AR) {
+    return Loops.count(AR->getLoop());
+  };
+  return NormalizeDenormalizeRewriter(Denormalize, Pred, SE).visit(S);
 }
diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
index 79dc84e..470f4be 100644
--- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp
+++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp
@@ -195,7 +195,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
-  SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+  SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C));
   Succs[Case.getSuccessorIndex()] = true;
 }
 
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 112118a..2be5d5c 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
@@ -82,24 +83,24 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
 
   if (T.getArch() == Triple::r600 ||
       T.getArch() == Triple::amdgcn) {
-    TLI.setUnavailable(LibFunc::ldexp);
-    TLI.setUnavailable(LibFunc::ldexpf);
-    TLI.setUnavailable(LibFunc::ldexpl);
-    TLI.setUnavailable(LibFunc::exp10);
-    TLI.setUnavailable(LibFunc::exp10f);
-    TLI.setUnavailable(LibFunc::exp10l);
-    TLI.setUnavailable(LibFunc::log10);
-    TLI.setUnavailable(LibFunc::log10f);
-    TLI.setUnavailable(LibFunc::log10l);
+    TLI.setUnavailable(LibFunc_ldexp);
+    TLI.setUnavailable(LibFunc_ldexpf);
+    TLI.setUnavailable(LibFunc_ldexpl);
+    TLI.setUnavailable(LibFunc_exp10);
+    TLI.setUnavailable(LibFunc_exp10f);
+    TLI.setUnavailable(LibFunc_exp10l);
+    TLI.setUnavailable(LibFunc_log10);
+    TLI.setUnavailable(LibFunc_log10f);
+    TLI.setUnavailable(LibFunc_log10l);
   }
 
   // There are no library implementations of mempcy and memset for AMD gpus and
   // these can be difficult to lower in the backend.
   if (T.getArch() == Triple::r600 ||
       T.getArch() == Triple::amdgcn) {
-    TLI.setUnavailable(LibFunc::memcpy);
-    TLI.setUnavailable(LibFunc::memset);
-    TLI.setUnavailable(LibFunc::memset_pattern16);
+    TLI.setUnavailable(LibFunc_memcpy);
+    TLI.setUnavailable(LibFunc_memset);
+    TLI.setUnavailable(LibFunc_memset_pattern16);
     return;
   }
 
@@ -107,21 +108,21 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   // All versions of watchOS support it.
   if (T.isMacOSX()) {
     if (T.isMacOSXVersionLT(10, 5))
-      TLI.setUnavailable(LibFunc::memset_pattern16);
+      TLI.setUnavailable(LibFunc_memset_pattern16);
   } else if (T.isiOS()) {
     if (T.isOSVersionLT(3, 0))
-      TLI.setUnavailable(LibFunc::memset_pattern16);
+      TLI.setUnavailable(LibFunc_memset_pattern16);
   } else if (!T.isWatchOS()) {
-    TLI.setUnavailable(LibFunc::memset_pattern16);
+    TLI.setUnavailable(LibFunc_memset_pattern16);
   }
 
   if (!hasSinCosPiStret(T)) {
-    TLI.setUnavailable(LibFunc::sinpi);
-    TLI.setUnavailable(LibFunc::sinpif);
-    TLI.setUnavailable(LibFunc::cospi);
-    TLI.setUnavailable(LibFunc::cospif);
-    TLI.setUnavailable(LibFunc::sincospi_stret);
-    TLI.setUnavailable(LibFunc::sincospif_stret);
+    TLI.setUnavailable(LibFunc_sinpi);
+    TLI.setUnavailable(LibFunc_sinpif);
+    TLI.setUnavailable(LibFunc_cospi);
+    TLI.setUnavailable(LibFunc_cospif);
+    TLI.setUnavailable(LibFunc_sincospi_stret);
+    TLI.setUnavailable(LibFunc_sincospif_stret);
   }
 
   if (T.isMacOSX() && T.getArch() == Triple::x86 &&
@@ -131,179 +132,223 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     // has a $UNIX2003 suffix. The two implementations are identical except
     // for the return value in some edge cases.  However, we don't want to
     // generate code that depends on the old symbols.
-    TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
-    TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+    TLI.setAvailableWithName(LibFunc_fwrite, "fwrite$UNIX2003");
+    TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003");
   }
 
   // iprintf and friends are only available on XCore and TCE.
   if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
-    TLI.setUnavailable(LibFunc::iprintf);
-    TLI.setUnavailable(LibFunc::siprintf);
-    TLI.setUnavailable(LibFunc::fiprintf);
+    TLI.setUnavailable(LibFunc_iprintf);
+    TLI.setUnavailable(LibFunc_siprintf);
+    TLI.setUnavailable(LibFunc_fiprintf);
   }
 
   if (T.isOSWindows() && !T.isOSCygMing()) {
     // Win32 does not support long double
-    TLI.setUnavailable(LibFunc::acosl);
-    TLI.setUnavailable(LibFunc::asinl);
-    TLI.setUnavailable(LibFunc::atanl);
-    TLI.setUnavailable(LibFunc::atan2l);
-    TLI.setUnavailable(LibFunc::ceill);
-    TLI.setUnavailable(LibFunc::copysignl);
-    TLI.setUnavailable(LibFunc::cosl);
-    TLI.setUnavailable(LibFunc::coshl);
-    TLI.setUnavailable(LibFunc::expl);
-    TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
-    TLI.setUnavailable(LibFunc::fabsl);
-    TLI.setUnavailable(LibFunc::floorl);
-    TLI.setUnavailable(LibFunc::fmaxl);
-    TLI.setUnavailable(LibFunc::fminl);
-    TLI.setUnavailable(LibFunc::fmodl);
-    TLI.setUnavailable(LibFunc::frexpl);
-    TLI.setUnavailable(LibFunc::ldexpf);
-    TLI.setUnavailable(LibFunc::ldexpl);
-    TLI.setUnavailable(LibFunc::logl);
-    TLI.setUnavailable(LibFunc::modfl);
-    TLI.setUnavailable(LibFunc::powl);
-    TLI.setUnavailable(LibFunc::sinl);
-    TLI.setUnavailable(LibFunc::sinhl);
-    TLI.setUnavailable(LibFunc::sqrtl);
-    TLI.setUnavailable(LibFunc::tanl);
-    TLI.setUnavailable(LibFunc::tanhl);
+    TLI.setUnavailable(LibFunc_acosl);
+    TLI.setUnavailable(LibFunc_asinl);
+    TLI.setUnavailable(LibFunc_atanl);
+    TLI.setUnavailable(LibFunc_atan2l);
+    TLI.setUnavailable(LibFunc_ceill);
+    TLI.setUnavailable(LibFunc_copysignl);
+    TLI.setUnavailable(LibFunc_cosl);
+    TLI.setUnavailable(LibFunc_coshl);
+    TLI.setUnavailable(LibFunc_expl);
+    TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
+    TLI.setUnavailable(LibFunc_fabsl);
+    TLI.setUnavailable(LibFunc_floorl);
+    TLI.setUnavailable(LibFunc_fmaxl);
+    TLI.setUnavailable(LibFunc_fminl);
+    TLI.setUnavailable(LibFunc_fmodl);
+    TLI.setUnavailable(LibFunc_frexpl);
+    TLI.setUnavailable(LibFunc_ldexpf);
+    TLI.setUnavailable(LibFunc_ldexpl);
+    TLI.setUnavailable(LibFunc_logl);
+    TLI.setUnavailable(LibFunc_modfl);
+    TLI.setUnavailable(LibFunc_powl);
+    TLI.setUnavailable(LibFunc_sinl);
+    TLI.setUnavailable(LibFunc_sinhl);
+    TLI.setUnavailable(LibFunc_sqrtl);
+    TLI.setUnavailable(LibFunc_tanl);
+    TLI.setUnavailable(LibFunc_tanhl);
 
     // Win32 only has C89 math
-    TLI.setUnavailable(LibFunc::acosh);
-    TLI.setUnavailable(LibFunc::acoshf);
-    TLI.setUnavailable(LibFunc::acoshl);
-    TLI.setUnavailable(LibFunc::asinh);
-    TLI.setUnavailable(LibFunc::asinhf);
-    TLI.setUnavailable(LibFunc::asinhl);
-    TLI.setUnavailable(LibFunc::atanh);
-    TLI.setUnavailable(LibFunc::atanhf);
-    TLI.setUnavailable(LibFunc::atanhl);
-    TLI.setUnavailable(LibFunc::cbrt);
-    TLI.setUnavailable(LibFunc::cbrtf);
-    TLI.setUnavailable(LibFunc::cbrtl);
-    TLI.setUnavailable(LibFunc::exp2);
-    TLI.setUnavailable(LibFunc::exp2f);
-    TLI.setUnavailable(LibFunc::exp2l);
-    TLI.setUnavailable(LibFunc::expm1);
-    TLI.setUnavailable(LibFunc::expm1f);
-    TLI.setUnavailable(LibFunc::expm1l);
-    TLI.setUnavailable(LibFunc::log2);
-    TLI.setUnavailable(LibFunc::log2f);
-    TLI.setUnavailable(LibFunc::log2l);
-    TLI.setUnavailable(LibFunc::log1p);
-    TLI.setUnavailable(LibFunc::log1pf);
-    TLI.setUnavailable(LibFunc::log1pl);
-    TLI.setUnavailable(LibFunc::logb);
-    TLI.setUnavailable(LibFunc::logbf);
-    TLI.setUnavailable(LibFunc::logbl);
-    TLI.setUnavailable(LibFunc::nearbyint);
-    TLI.setUnavailable(LibFunc::nearbyintf);
-    TLI.setUnavailable(LibFunc::nearbyintl);
-    TLI.setUnavailable(LibFunc::rint);
-    TLI.setUnavailable(LibFunc::rintf);
-    TLI.setUnavailable(LibFunc::rintl);
-    TLI.setUnavailable(LibFunc::round);
-    TLI.setUnavailable(LibFunc::roundf);
-    TLI.setUnavailable(LibFunc::roundl);
-    TLI.setUnavailable(LibFunc::trunc);
-    TLI.setUnavailable(LibFunc::truncf);
-    TLI.setUnavailable(LibFunc::truncl);
+    TLI.setUnavailable(LibFunc_acosh);
+    TLI.setUnavailable(LibFunc_acoshf);
+    TLI.setUnavailable(LibFunc_acoshl);
+    TLI.setUnavailable(LibFunc_asinh);
+    TLI.setUnavailable(LibFunc_asinhf);
+    TLI.setUnavailable(LibFunc_asinhl);
+    TLI.setUnavailable(LibFunc_atanh);
+    TLI.setUnavailable(LibFunc_atanhf);
+    TLI.setUnavailable(LibFunc_atanhl);
+    TLI.setUnavailable(LibFunc_cbrt);
+    TLI.setUnavailable(LibFunc_cbrtf);
+    TLI.setUnavailable(LibFunc_cbrtl);
+    TLI.setUnavailable(LibFunc_exp2);
+    TLI.setUnavailable(LibFunc_exp2f);
+    TLI.setUnavailable(LibFunc_exp2l);
+    TLI.setUnavailable(LibFunc_expm1);
+    TLI.setUnavailable(LibFunc_expm1f);
+    TLI.setUnavailable(LibFunc_expm1l);
+    TLI.setUnavailable(LibFunc_log2);
+    TLI.setUnavailable(LibFunc_log2f);
+    TLI.setUnavailable(LibFunc_log2l);
+    TLI.setUnavailable(LibFunc_log1p);
+    TLI.setUnavailable(LibFunc_log1pf);
+    TLI.setUnavailable(LibFunc_log1pl);
+    TLI.setUnavailable(LibFunc_logb);
+    TLI.setUnavailable(LibFunc_logbf);
+    TLI.setUnavailable(LibFunc_logbl);
+    TLI.setUnavailable(LibFunc_nearbyint);
+    TLI.setUnavailable(LibFunc_nearbyintf);
+    TLI.setUnavailable(LibFunc_nearbyintl);
+    TLI.setUnavailable(LibFunc_rint);
+    TLI.setUnavailable(LibFunc_rintf);
+    TLI.setUnavailable(LibFunc_rintl);
+    TLI.setUnavailable(LibFunc_round);
+    TLI.setUnavailable(LibFunc_roundf);
+    TLI.setUnavailable(LibFunc_roundl);
+    TLI.setUnavailable(LibFunc_trunc);
+    TLI.setUnavailable(LibFunc_truncf);
+    TLI.setUnavailable(LibFunc_truncl);
 
     // Win32 provides some C99 math with mangled names
-    TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+    TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
 
     if (T.getArch() == Triple::x86) {
       // Win32 on x86 implements single-precision math functions as macros
-      TLI.setUnavailable(LibFunc::acosf);
-      TLI.setUnavailable(LibFunc::asinf);
-      TLI.setUnavailable(LibFunc::atanf);
-      TLI.setUnavailable(LibFunc::atan2f);
-      TLI.setUnavailable(LibFunc::ceilf);
-      TLI.setUnavailable(LibFunc::copysignf);
-      TLI.setUnavailable(LibFunc::cosf);
-      TLI.setUnavailable(LibFunc::coshf);
-      TLI.setUnavailable(LibFunc::expf);
-      TLI.setUnavailable(LibFunc::floorf);
-      TLI.setUnavailable(LibFunc::fminf);
-      TLI.setUnavailable(LibFunc::fmaxf);
-      TLI.setUnavailable(LibFunc::fmodf);
-      TLI.setUnavailable(LibFunc::logf);
-      TLI.setUnavailable(LibFunc::log10f);
-      TLI.setUnavailable(LibFunc::modff);
-      TLI.setUnavailable(LibFunc::powf);
-      TLI.setUnavailable(LibFunc::sinf);
-      TLI.setUnavailable(LibFunc::sinhf);
-      TLI.setUnavailable(LibFunc::sqrtf);
-      TLI.setUnavailable(LibFunc::tanf);
-      TLI.setUnavailable(LibFunc::tanhf);
+      TLI.setUnavailable(LibFunc_acosf);
+      TLI.setUnavailable(LibFunc_asinf);
+      TLI.setUnavailable(LibFunc_atanf);
+      TLI.setUnavailable(LibFunc_atan2f);
+      TLI.setUnavailable(LibFunc_ceilf);
+      TLI.setUnavailable(LibFunc_copysignf);
+      TLI.setUnavailable(LibFunc_cosf);
+      TLI.setUnavailable(LibFunc_coshf);
+      TLI.setUnavailable(LibFunc_expf);
+      TLI.setUnavailable(LibFunc_floorf);
+      TLI.setUnavailable(LibFunc_fminf);
+      TLI.setUnavailable(LibFunc_fmaxf);
+      TLI.setUnavailable(LibFunc_fmodf);
+      TLI.setUnavailable(LibFunc_logf);
+      TLI.setUnavailable(LibFunc_log10f);
+      TLI.setUnavailable(LibFunc_modff);
+      TLI.setUnavailable(LibFunc_powf);
+      TLI.setUnavailable(LibFunc_sinf);
+      TLI.setUnavailable(LibFunc_sinhf);
+      TLI.setUnavailable(LibFunc_sqrtf);
+      TLI.setUnavailable(LibFunc_tanf);
+      TLI.setUnavailable(LibFunc_tanhf);
     }
 
+    // These definitions are due to math-finite.h header on Linux
+    TLI.setUnavailable(LibFunc_acos_finite);
+    TLI.setUnavailable(LibFunc_acosf_finite);
+    TLI.setUnavailable(LibFunc_acosl_finite);
+    TLI.setUnavailable(LibFunc_acosh_finite);
+    TLI.setUnavailable(LibFunc_acoshf_finite);
+    TLI.setUnavailable(LibFunc_acoshl_finite);
+    TLI.setUnavailable(LibFunc_asin_finite);
+    TLI.setUnavailable(LibFunc_asinf_finite);
+    TLI.setUnavailable(LibFunc_asinl_finite);
+    TLI.setUnavailable(LibFunc_atan2_finite);
+    TLI.setUnavailable(LibFunc_atan2f_finite);
+    TLI.setUnavailable(LibFunc_atan2l_finite);
+    TLI.setUnavailable(LibFunc_atanh_finite);
+    TLI.setUnavailable(LibFunc_atanhf_finite);
+    TLI.setUnavailable(LibFunc_atanhl_finite);
+    TLI.setUnavailable(LibFunc_cosh_finite);
+    TLI.setUnavailable(LibFunc_coshf_finite);
+    TLI.setUnavailable(LibFunc_coshl_finite);
+    TLI.setUnavailable(LibFunc_exp10_finite);
+    TLI.setUnavailable(LibFunc_exp10f_finite);
+    TLI.setUnavailable(LibFunc_exp10l_finite);
+    TLI.setUnavailable(LibFunc_exp2_finite);
+    TLI.setUnavailable(LibFunc_exp2f_finite);
+    TLI.setUnavailable(LibFunc_exp2l_finite);
+    TLI.setUnavailable(LibFunc_exp_finite);
+    TLI.setUnavailable(LibFunc_expf_finite);
+    TLI.setUnavailable(LibFunc_expl_finite);
+    TLI.setUnavailable(LibFunc_log10_finite);
+    TLI.setUnavailable(LibFunc_log10f_finite);
+    TLI.setUnavailable(LibFunc_log10l_finite);
+    TLI.setUnavailable(LibFunc_log2_finite);
+    TLI.setUnavailable(LibFunc_log2f_finite);
+    TLI.setUnavailable(LibFunc_log2l_finite);
+    TLI.setUnavailable(LibFunc_log_finite);
+    TLI.setUnavailable(LibFunc_logf_finite);
+    TLI.setUnavailable(LibFunc_logl_finite);
+    TLI.setUnavailable(LibFunc_pow_finite);
+    TLI.setUnavailable(LibFunc_powf_finite);
+    TLI.setUnavailable(LibFunc_powl_finite);
+    TLI.setUnavailable(LibFunc_sinh_finite);
+    TLI.setUnavailable(LibFunc_sinhf_finite);
+    TLI.setUnavailable(LibFunc_sinhl_finite);
+
     // Win32 does *not* provide provide these functions, but they are
     // generally available on POSIX-compliant systems:
-    TLI.setUnavailable(LibFunc::access);
-    TLI.setUnavailable(LibFunc::bcmp);
-    TLI.setUnavailable(LibFunc::bcopy);
-    TLI.setUnavailable(LibFunc::bzero);
-    TLI.setUnavailable(LibFunc::chmod);
-    TLI.setUnavailable(LibFunc::chown);
-    TLI.setUnavailable(LibFunc::closedir);
-    TLI.setUnavailable(LibFunc::ctermid);
-    TLI.setUnavailable(LibFunc::fdopen);
-    TLI.setUnavailable(LibFunc::ffs);
-    TLI.setUnavailable(LibFunc::fileno);
-    TLI.setUnavailable(LibFunc::flockfile);
-    TLI.setUnavailable(LibFunc::fseeko);
-    TLI.setUnavailable(LibFunc::fstat);
-    TLI.setUnavailable(LibFunc::fstatvfs);
-    TLI.setUnavailable(LibFunc::ftello);
-    TLI.setUnavailable(LibFunc::ftrylockfile);
-    TLI.setUnavailable(LibFunc::funlockfile);
-    TLI.setUnavailable(LibFunc::getc_unlocked);
-    TLI.setUnavailable(LibFunc::getitimer);
-    TLI.setUnavailable(LibFunc::getlogin_r);
-    TLI.setUnavailable(LibFunc::getpwnam);
-    TLI.setUnavailable(LibFunc::gettimeofday);
-    TLI.setUnavailable(LibFunc::htonl);
-    TLI.setUnavailable(LibFunc::htons);
-    TLI.setUnavailable(LibFunc::lchown);
-    TLI.setUnavailable(LibFunc::lstat);
-    TLI.setUnavailable(LibFunc::memccpy);
-    TLI.setUnavailable(LibFunc::mkdir);
-    TLI.setUnavailable(LibFunc::ntohl);
-    TLI.setUnavailable(LibFunc::ntohs);
-    TLI.setUnavailable(LibFunc::open);
-    TLI.setUnavailable(LibFunc::opendir);
-    TLI.setUnavailable(LibFunc::pclose);
-    TLI.setUnavailable(LibFunc::popen);
-    TLI.setUnavailable(LibFunc::pread);
-    TLI.setUnavailable(LibFunc::pwrite);
-    TLI.setUnavailable(LibFunc::read);
-    TLI.setUnavailable(LibFunc::readlink);
-    TLI.setUnavailable(LibFunc::realpath);
-    TLI.setUnavailable(LibFunc::rmdir);
-    TLI.setUnavailable(LibFunc::setitimer);
-    TLI.setUnavailable(LibFunc::stat);
-    TLI.setUnavailable(LibFunc::statvfs);
-    TLI.setUnavailable(LibFunc::stpcpy);
-    TLI.setUnavailable(LibFunc::stpncpy);
-    TLI.setUnavailable(LibFunc::strcasecmp);
-    TLI.setUnavailable(LibFunc::strncasecmp);
-    TLI.setUnavailable(LibFunc::times);
-    TLI.setUnavailable(LibFunc::uname);
-    TLI.setUnavailable(LibFunc::unlink);
-    TLI.setUnavailable(LibFunc::unsetenv);
-    TLI.setUnavailable(LibFunc::utime);
-    TLI.setUnavailable(LibFunc::utimes);
-    TLI.setUnavailable(LibFunc::write);
+    TLI.setUnavailable(LibFunc_access);
+    TLI.setUnavailable(LibFunc_bcmp);
+    TLI.setUnavailable(LibFunc_bcopy);
+    TLI.setUnavailable(LibFunc_bzero);
+    TLI.setUnavailable(LibFunc_chmod);
+    TLI.setUnavailable(LibFunc_chown);
+    TLI.setUnavailable(LibFunc_closedir);
+    TLI.setUnavailable(LibFunc_ctermid);
+    TLI.setUnavailable(LibFunc_fdopen);
+    TLI.setUnavailable(LibFunc_ffs);
+    TLI.setUnavailable(LibFunc_fileno);
+    TLI.setUnavailable(LibFunc_flockfile);
+    TLI.setUnavailable(LibFunc_fseeko);
+    TLI.setUnavailable(LibFunc_fstat);
+    TLI.setUnavailable(LibFunc_fstatvfs);
+    TLI.setUnavailable(LibFunc_ftello);
+    TLI.setUnavailable(LibFunc_ftrylockfile);
+    TLI.setUnavailable(LibFunc_funlockfile);
+    TLI.setUnavailable(LibFunc_getc_unlocked);
+    TLI.setUnavailable(LibFunc_getitimer);
+    TLI.setUnavailable(LibFunc_getlogin_r);
+    TLI.setUnavailable(LibFunc_getpwnam);
+    TLI.setUnavailable(LibFunc_gettimeofday);
+    TLI.setUnavailable(LibFunc_htonl);
+    TLI.setUnavailable(LibFunc_htons);
+    TLI.setUnavailable(LibFunc_lchown);
+    TLI.setUnavailable(LibFunc_lstat);
+    TLI.setUnavailable(LibFunc_memccpy);
+    TLI.setUnavailable(LibFunc_mkdir);
+    TLI.setUnavailable(LibFunc_ntohl);
+    TLI.setUnavailable(LibFunc_ntohs);
+    TLI.setUnavailable(LibFunc_open);
+    TLI.setUnavailable(LibFunc_opendir);
+    TLI.setUnavailable(LibFunc_pclose);
+    TLI.setUnavailable(LibFunc_popen);
+    TLI.setUnavailable(LibFunc_pread);
+    TLI.setUnavailable(LibFunc_pwrite);
+    TLI.setUnavailable(LibFunc_read);
+    TLI.setUnavailable(LibFunc_readlink);
+    TLI.setUnavailable(LibFunc_realpath);
+    TLI.setUnavailable(LibFunc_rmdir);
+    TLI.setUnavailable(LibFunc_setitimer);
+    TLI.setUnavailable(LibFunc_stat);
+    TLI.setUnavailable(LibFunc_statvfs);
+    TLI.setUnavailable(LibFunc_stpcpy);
+    TLI.setUnavailable(LibFunc_stpncpy);
+    TLI.setUnavailable(LibFunc_strcasecmp);
+    TLI.setUnavailable(LibFunc_strncasecmp);
+    TLI.setUnavailable(LibFunc_times);
+    TLI.setUnavailable(LibFunc_uname);
+    TLI.setUnavailable(LibFunc_unlink);
+    TLI.setUnavailable(LibFunc_unsetenv);
+    TLI.setUnavailable(LibFunc_utime);
+    TLI.setUnavailable(LibFunc_utimes);
+    TLI.setUnavailable(LibFunc_write);
 
     // Win32 does *not* provide provide these functions, but they are
     // specified by C99:
-    TLI.setUnavailable(LibFunc::atoll);
-    TLI.setUnavailable(LibFunc::frexpf);
-    TLI.setUnavailable(LibFunc::llabs);
+    TLI.setUnavailable(LibFunc_atoll);
+    TLI.setUnavailable(LibFunc_frexpf);
+    TLI.setUnavailable(LibFunc_llabs);
   }
 
   switch (T.getOS()) {
@@ -311,28 +356,28 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
     // and their names are __exp10 and __exp10f. exp10l is not available on
     // OS X or iOS.
-    TLI.setUnavailable(LibFunc::exp10l);
+    TLI.setUnavailable(LibFunc_exp10l);
     if (T.isMacOSXVersionLT(10, 9)) {
-      TLI.setUnavailable(LibFunc::exp10);
-      TLI.setUnavailable(LibFunc::exp10f);
+      TLI.setUnavailable(LibFunc_exp10);
+      TLI.setUnavailable(LibFunc_exp10f);
     } else {
-      TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
-      TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+      TLI.setAvailableWithName(LibFunc_exp10, "__exp10");
+      TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f");
     }
     break;
   case Triple::IOS:
   case Triple::TvOS:
   case Triple::WatchOS:
-    TLI.setUnavailable(LibFunc::exp10l);
+    TLI.setUnavailable(LibFunc_exp10l);
     if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
                            (T.isOSVersionLT(9, 0) &&
                             (T.getArch() == Triple::x86 ||
                              T.getArch() == Triple::x86_64)))) {
-      TLI.setUnavailable(LibFunc::exp10);
-      TLI.setUnavailable(LibFunc::exp10f);
+      TLI.setUnavailable(LibFunc_exp10);
+      TLI.setUnavailable(LibFunc_exp10f);
     } else {
-      TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
-      TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+      TLI.setAvailableWithName(LibFunc_exp10, "__exp10");
+      TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f");
     }
     break;
   case Triple::Linux:
@@ -344,9 +389,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     // Fall through to disable all of them.
     LLVM_FALLTHROUGH;
   default:
-    TLI.setUnavailable(LibFunc::exp10);
-    TLI.setUnavailable(LibFunc::exp10f);
-    TLI.setUnavailable(LibFunc::exp10l);
+    TLI.setUnavailable(LibFunc_exp10);
+    TLI.setUnavailable(LibFunc_exp10f);
+    TLI.setUnavailable(LibFunc_exp10l);
   }
 
   // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
@@ -364,7 +409,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   case Triple::Linux:
     break;
   default:
-    TLI.setUnavailable(LibFunc::ffsl);
+    TLI.setUnavailable(LibFunc_ffsl);
   }
 
   // ffsll is available on at least FreeBSD and Linux (GLIBC):
@@ -380,7 +425,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   case Triple::Linux:
     break;
   default:
-    TLI.setUnavailable(LibFunc::ffsll);
+    TLI.setUnavailable(LibFunc_ffsll);
   }
 
   // The following functions are available on at least FreeBSD:
@@ -388,30 +433,30 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
   // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
   if (!T.isOSFreeBSD()) {
-    TLI.setUnavailable(LibFunc::fls);
-    TLI.setUnavailable(LibFunc::flsl);
-    TLI.setUnavailable(LibFunc::flsll);
+    TLI.setUnavailable(LibFunc_fls);
+    TLI.setUnavailable(LibFunc_flsl);
+    TLI.setUnavailable(LibFunc_flsll);
   }
 
   // The following functions are available on at least Linux:
   if (!T.isOSLinux()) {
-    TLI.setUnavailable(LibFunc::dunder_strdup);
-    TLI.setUnavailable(LibFunc::dunder_strtok_r);
-    TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
-    TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
-    TLI.setUnavailable(LibFunc::under_IO_getc);
-    TLI.setUnavailable(LibFunc::under_IO_putc);
-    TLI.setUnavailable(LibFunc::memalign);
-    TLI.setUnavailable(LibFunc::fopen64);
-    TLI.setUnavailable(LibFunc::fseeko64);
-    TLI.setUnavailable(LibFunc::fstat64);
-    TLI.setUnavailable(LibFunc::fstatvfs64);
-    TLI.setUnavailable(LibFunc::ftello64);
-    TLI.setUnavailable(LibFunc::lstat64);
-    TLI.setUnavailable(LibFunc::open64);
-    TLI.setUnavailable(LibFunc::stat64);
-    TLI.setUnavailable(LibFunc::statvfs64);
-    TLI.setUnavailable(LibFunc::tmpfile64);
+    TLI.setUnavailable(LibFunc_dunder_strdup);
+    TLI.setUnavailable(LibFunc_dunder_strtok_r);
+    TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
+    TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
+    TLI.setUnavailable(LibFunc_under_IO_getc);
+    TLI.setUnavailable(LibFunc_under_IO_putc);
+    TLI.setUnavailable(LibFunc_memalign);
+    TLI.setUnavailable(LibFunc_fopen64);
+    TLI.setUnavailable(LibFunc_fseeko64);
+    TLI.setUnavailable(LibFunc_fstat64);
+    TLI.setUnavailable(LibFunc_fstatvfs64);
+    TLI.setUnavailable(LibFunc_ftello64);
+    TLI.setUnavailable(LibFunc_lstat64);
+    TLI.setUnavailable(LibFunc_open64);
+    TLI.setUnavailable(LibFunc_stat64);
+    TLI.setUnavailable(LibFunc_statvfs64);
+    TLI.setUnavailable(LibFunc_tmpfile64);
   }
 
   // As currently implemented in clang, NVPTX code has no standard library to
@@ -427,9 +472,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   // optimizations, so this situation should be fixed.
   if (T.isNVPTX()) {
     TLI.disableAllFunctions();
-    TLI.setAvailable(LibFunc::nvvm_reflect);
+    TLI.setAvailable(LibFunc_nvvm_reflect);
   } else {
-    TLI.setUnavailable(LibFunc::nvvm_reflect);
+    TLI.setUnavailable(LibFunc_nvvm_reflect);
   }
 
   TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
@@ -496,13 +541,13 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
 
   // Check for \01 prefix that is used to mangle __asm declarations and
   // strip it if present.
-  return GlobalValue::getRealLinkageName(funcName);
+  return GlobalValue::dropLLVMManglingEscape(funcName);
 }
 
 bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
-                                       LibFunc::Func &F) const {
+                                       LibFunc &F) const {
   StringRef const *Start = &StandardNames[0];
-  StringRef const *End = &StandardNames[LibFunc::NumLibFuncs];
+  StringRef const *End = &StandardNames[NumLibFuncs];
 
   funcName = sanitizeFunctionName(funcName);
   if (funcName.empty())
@@ -513,14 +558,14 @@ bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
         return LHS < RHS;
       });
   if (I != End && *I == funcName) {
-    F = (LibFunc::Func)(I - Start);
+    F = (LibFunc)(I - Start);
     return true;
   }
   return false;
 }
 
 bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
-                                                   LibFunc::Func F,
+                                                   LibFunc F,
                                                    const DataLayout *DL) const {
   LLVMContext &Ctx = FTy.getContext();
   Type *PCharTy = Type::getInt8PtrTy(Ctx);
@@ -531,504 +576,706 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   unsigned NumParams = FTy.getNumParams();
 
   switch (F) {
-  case LibFunc::strlen:
+  case LibFunc_strlen:
     return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy());
 
-  case LibFunc::strchr:
-  case LibFunc::strrchr:
+  case LibFunc_strchr:
+  case LibFunc_strrchr:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
             FTy.getParamType(1)->isIntegerTy());
 
-  case LibFunc::strtol:
-  case LibFunc::strtod:
-  case LibFunc::strtof:
-  case LibFunc::strtoul:
-  case LibFunc::strtoll:
-  case LibFunc::strtold:
-  case LibFunc::strtoull:
+  case LibFunc_strtol:
+  case LibFunc_strtod:
+  case LibFunc_strtof:
+  case LibFunc_strtoul:
+  case LibFunc_strtoll:
+  case LibFunc_strtold:
+  case LibFunc_strtoull:
     return ((NumParams == 2 || NumParams == 3) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::strcat:
+  case LibFunc_strcat:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
             FTy.getParamType(1) == FTy.getReturnType());
 
-  case LibFunc::strncat:
+  case LibFunc_strncat:
     return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
             FTy.getParamType(1) == FTy.getReturnType() &&
             FTy.getParamType(2)->isIntegerTy());
 
-  case LibFunc::strcpy_chk:
-  case LibFunc::stpcpy_chk:
+  case LibFunc_strcpy_chk:
+  case LibFunc_stpcpy_chk:
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
     LLVM_FALLTHROUGH;
-  case LibFunc::strcpy:
-  case LibFunc::stpcpy:
+  case LibFunc_strcpy:
+  case LibFunc_stpcpy:
     return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getParamType(0) == PCharTy);
 
-  case LibFunc::strncpy_chk:
-  case LibFunc::stpncpy_chk:
+  case LibFunc_strncpy_chk:
+  case LibFunc_stpncpy_chk:
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
     LLVM_FALLTHROUGH;
-  case LibFunc::strncpy:
-  case LibFunc::stpncpy:
+  case LibFunc_strncpy:
+  case LibFunc_stpncpy:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getParamType(0) == PCharTy &&
             FTy.getParamType(2)->isIntegerTy());
 
-  case LibFunc::strxfrm:
+  case LibFunc_strxfrm:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::strcmp:
+  case LibFunc_strcmp:
     return (NumParams == 2 && FTy.getReturnType()->isIntegerTy(32) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(0) == FTy.getParamType(1));
 
-  case LibFunc::strncmp:
+  case LibFunc_strncmp:
     return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getParamType(2)->isIntegerTy());
 
-  case LibFunc::strspn:
-  case LibFunc::strcspn:
+  case LibFunc_strspn:
+  case LibFunc_strcspn:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getReturnType()->isIntegerTy());
 
-  case LibFunc::strcoll:
-  case LibFunc::strcasecmp:
-  case LibFunc::strncasecmp:
+  case LibFunc_strcoll:
+  case LibFunc_strcasecmp:
+  case LibFunc_strncasecmp:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::strstr:
+  case LibFunc_strstr:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::strpbrk:
+  case LibFunc_strpbrk:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0) == FTy.getParamType(1));
 
-  case LibFunc::strtok:
-  case LibFunc::strtok_r:
+  case LibFunc_strtok:
+  case LibFunc_strtok_r:
     return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::scanf:
-  case LibFunc::setbuf:
-  case LibFunc::setvbuf:
+  case LibFunc_scanf:
+  case LibFunc_setbuf:
+  case LibFunc_setvbuf:
     return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::strdup:
-  case LibFunc::strndup:
+  case LibFunc_strdup:
+  case LibFunc_strndup:
     return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy());
-  case LibFunc::sscanf:
-  case LibFunc::stat:
-  case LibFunc::statvfs:
-  case LibFunc::sprintf:
+  case LibFunc_sscanf:
+  case LibFunc_stat:
+  case LibFunc_statvfs:
+  case LibFunc_siprintf:
+  case LibFunc_sprintf:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::snprintf:
+  case LibFunc_snprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::setitimer:
+  case LibFunc_setitimer:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::system:
+  case LibFunc_system:
     return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::malloc:
+  case LibFunc_malloc:
     return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
-  case LibFunc::memcmp:
-    return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
-            FTy.getParamType(1)->isPointerTy() &&
-            FTy.getReturnType()->isIntegerTy(32));
+  case LibFunc_memcmp:
+    return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+            FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::memchr:
-  case LibFunc::memrchr:
-    return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+  case LibFunc_memchr:
+  case LibFunc_memrchr:
+    return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
+            FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(1)->isIntegerTy(32) &&
-            FTy.getParamType(2)->isIntegerTy() &&
-            FTy.getReturnType()->isPointerTy());
-  case LibFunc::modf:
-  case LibFunc::modff:
-  case LibFunc::modfl:
+            IsSizeTTy(FTy.getParamType(2)));
+  case LibFunc_modf:
+  case LibFunc_modff:
+  case LibFunc_modfl:
     return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::memcpy_chk:
-  case LibFunc::memmove_chk:
+  case LibFunc_memcpy_chk:
+  case LibFunc_memmove_chk:
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
     LLVM_FALLTHROUGH;
-  case LibFunc::memcpy:
-  case LibFunc::mempcpy:
-  case LibFunc::memmove:
+  case LibFunc_memcpy:
+  case LibFunc_mempcpy:
+  case LibFunc_memmove:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy() &&
             IsSizeTTy(FTy.getParamType(2)));
 
-  case LibFunc::memset_chk:
+  case LibFunc_memset_chk:
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
     LLVM_FALLTHROUGH;
-  case LibFunc::memset:
+  case LibFunc_memset:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isIntegerTy() &&
             IsSizeTTy(FTy.getParamType(2)));
 
-  case LibFunc::memccpy:
+  case LibFunc_memccpy:
     return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::memalign:
+  case LibFunc_memalign:
     return (FTy.getReturnType()->isPointerTy());
-  case LibFunc::realloc:
-    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
-            FTy.getReturnType()->isPointerTy());
-  case LibFunc::read:
+  case LibFunc_realloc:
+  case LibFunc_reallocf:
+    return (NumParams == 2 && FTy.getReturnType() == PCharTy &&
+            FTy.getParamType(0) == FTy.getReturnType() &&
+            IsSizeTTy(FTy.getParamType(1)));
+  case LibFunc_read:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::rewind:
-  case LibFunc::rmdir:
-  case LibFunc::remove:
-  case LibFunc::realpath:
+  case LibFunc_rewind:
+  case LibFunc_rmdir:
+  case LibFunc_remove:
+  case LibFunc_realpath:
     return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::rename:
+  case LibFunc_rename:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::readlink:
+  case LibFunc_readlink:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::write:
+  case LibFunc_write:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::bcopy:
-  case LibFunc::bcmp:
+  case LibFunc_bcopy:
+  case LibFunc_bcmp:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::bzero:
+  case LibFunc_bzero:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::calloc:
+  case LibFunc_calloc:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
 
-  case LibFunc::atof:
-  case LibFunc::atoi:
-  case LibFunc::atol:
-  case LibFunc::atoll:
-  case LibFunc::ferror:
-  case LibFunc::getenv:
-  case LibFunc::getpwnam:
-  case LibFunc::pclose:
-  case LibFunc::perror:
-  case LibFunc::printf:
-  case LibFunc::puts:
-  case LibFunc::uname:
-  case LibFunc::under_IO_getc:
-  case LibFunc::unlink:
-  case LibFunc::unsetenv:
+  case LibFunc_atof:
+  case LibFunc_atoi:
+  case LibFunc_atol:
+  case LibFunc_atoll:
+  case LibFunc_ferror:
+  case LibFunc_getenv:
+  case LibFunc_getpwnam:
+  case LibFunc_iprintf:
+  case LibFunc_pclose:
+  case LibFunc_perror:
+  case LibFunc_printf:
+  case LibFunc_puts:
+  case LibFunc_uname:
+  case LibFunc_under_IO_getc:
+  case LibFunc_unlink:
+  case LibFunc_unsetenv:
     return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
 
-  case LibFunc::chmod:
-  case LibFunc::chown:
-  case LibFunc::clearerr:
-  case LibFunc::closedir:
-  case LibFunc::ctermid:
-  case LibFunc::fclose:
-  case LibFunc::feof:
-  case LibFunc::fflush:
-  case LibFunc::fgetc:
-  case LibFunc::fileno:
-  case LibFunc::flockfile:
-  case LibFunc::free:
-  case LibFunc::fseek:
-  case LibFunc::fseeko64:
-  case LibFunc::fseeko:
-  case LibFunc::fsetpos:
-  case LibFunc::ftell:
-  case LibFunc::ftello64:
-  case LibFunc::ftello:
-  case LibFunc::ftrylockfile:
-  case LibFunc::funlockfile:
-  case LibFunc::getc:
-  case LibFunc::getc_unlocked:
-  case LibFunc::getlogin_r:
-  case LibFunc::mkdir:
-  case LibFunc::mktime:
-  case LibFunc::times:
+  case LibFunc_access:
+  case LibFunc_chmod:
+  case LibFunc_chown:
+  case LibFunc_clearerr:
+  case LibFunc_closedir:
+  case LibFunc_ctermid:
+  case LibFunc_fclose:
+  case LibFunc_feof:
+  case LibFunc_fflush:
+  case LibFunc_fgetc:
+  case LibFunc_fileno:
+  case LibFunc_flockfile:
+  case LibFunc_free:
+  case LibFunc_fseek:
+  case LibFunc_fseeko64:
+  case LibFunc_fseeko:
+  case LibFunc_fsetpos:
+  case LibFunc_ftell:
+  case LibFunc_ftello64:
+  case LibFunc_ftello:
+  case LibFunc_ftrylockfile:
+  case LibFunc_funlockfile:
+  case LibFunc_getc:
+  case LibFunc_getc_unlocked:
+  case LibFunc_getlogin_r:
+  case LibFunc_mkdir:
+  case LibFunc_mktime:
+  case LibFunc_times:
     return (NumParams != 0 && FTy.getParamType(0)->isPointerTy());
 
-  case LibFunc::access:
-    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::fopen:
+  case LibFunc_fopen:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fdopen:
+  case LibFunc_fdopen:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fputc:
-  case LibFunc::fstat:
-  case LibFunc::frexp:
-  case LibFunc::frexpf:
-  case LibFunc::frexpl:
-  case LibFunc::fstatvfs:
+  case LibFunc_fputc:
+  case LibFunc_fstat:
+  case LibFunc_frexp:
+  case LibFunc_frexpf:
+  case LibFunc_frexpl:
+  case LibFunc_fstatvfs:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fgets:
+  case LibFunc_fgets:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::fread:
+  case LibFunc_fread:
     return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(3)->isPointerTy());
-  case LibFunc::fwrite:
+  case LibFunc_fwrite:
     return (NumParams == 4 && FTy.getReturnType()->isIntegerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isIntegerTy() &&
             FTy.getParamType(2)->isIntegerTy() &&
             FTy.getParamType(3)->isPointerTy());
-  case LibFunc::fputs:
+  case LibFunc_fputs:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fscanf:
-  case LibFunc::fprintf:
-    return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
+  case LibFunc_fscanf:
+  case LibFunc_fiprintf:
+  case LibFunc_fprintf:
+    return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() &&
+            FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fgetpos:
+  case LibFunc_fgetpos:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::gets:
-  case LibFunc::getchar:
-  case LibFunc::getitimer:
+  case LibFunc_getchar:
+    return (NumParams == 0 && FTy.getReturnType()->isIntegerTy());
+  case LibFunc_gets:
+    return (NumParams == 1 && FTy.getParamType(0) == PCharTy);
+  case LibFunc_getitimer:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::ungetc:
+  case LibFunc_ungetc:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::utime:
-  case LibFunc::utimes:
+  case LibFunc_utime:
+  case LibFunc_utimes:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::putc:
+  case LibFunc_putc:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::pread:
-  case LibFunc::pwrite:
+  case LibFunc_pread:
+  case LibFunc_pwrite:
     return (NumParams == 4 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::popen:
+  case LibFunc_popen:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::vscanf:
+  case LibFunc_vscanf:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::vsscanf:
+  case LibFunc_vsscanf:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::vfscanf:
+  case LibFunc_vfscanf:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::valloc:
+  case LibFunc_valloc:
     return (FTy.getReturnType()->isPointerTy());
-  case LibFunc::vprintf:
+  case LibFunc_vprintf:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::vfprintf:
-  case LibFunc::vsprintf:
+  case LibFunc_vfprintf:
+  case LibFunc_vsprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::vsnprintf:
+  case LibFunc_vsnprintf:
     return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
-  case LibFunc::open:
+  case LibFunc_open:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::opendir:
+  case LibFunc_opendir:
     return (NumParams == 1 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy());
-  case LibFunc::tmpfile:
+  case LibFunc_tmpfile:
     return (FTy.getReturnType()->isPointerTy());
-  case LibFunc::htonl:
-  case LibFunc::htons:
-  case LibFunc::ntohl:
-  case LibFunc::ntohs:
-  case LibFunc::lstat:
+  case LibFunc_htonl:
+  case LibFunc_ntohl:
+    return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
+            FTy.getReturnType() == FTy.getParamType(0));
+  case LibFunc_htons:
+  case LibFunc_ntohs:
+    return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(16) &&
+            FTy.getReturnType() == FTy.getParamType(0));
+  case LibFunc_lstat:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::lchown:
+  case LibFunc_lchown:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::qsort:
+  case LibFunc_qsort:
     return (NumParams == 4 && FTy.getParamType(3)->isPointerTy());
-  case LibFunc::dunder_strdup:
-  case LibFunc::dunder_strndup:
+  case LibFunc_dunder_strdup:
+  case LibFunc_dunder_strndup:
     return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy());
-  case LibFunc::dunder_strtok_r:
+  case LibFunc_dunder_strtok_r:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::under_IO_putc:
+  case LibFunc_under_IO_putc:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::dunder_isoc99_scanf:
+  case LibFunc_dunder_isoc99_scanf:
     return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::stat64:
-  case LibFunc::lstat64:
-  case LibFunc::statvfs64:
+  case LibFunc_stat64:
+  case LibFunc_lstat64:
+  case LibFunc_statvfs64:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::dunder_isoc99_sscanf:
+  case LibFunc_dunder_isoc99_sscanf:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::fopen64:
+  case LibFunc_fopen64:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
-  case LibFunc::tmpfile64:
+  case LibFunc_tmpfile64:
     return (FTy.getReturnType()->isPointerTy());
-  case LibFunc::fstat64:
-  case LibFunc::fstatvfs64:
+  case LibFunc_fstat64:
+  case LibFunc_fstatvfs64:
     return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
-  case LibFunc::open64:
+  case LibFunc_open64:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
-  case LibFunc::gettimeofday:
+  case LibFunc_gettimeofday:
     return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
 
-  case LibFunc::Znwj:                    // new(unsigned int);
-  case LibFunc::Znwm:                    // new(unsigned long);
-  case LibFunc::Znaj:                    // new[](unsigned int);
-  case LibFunc::Znam:                    // new[](unsigned long);
-  case LibFunc::msvc_new_int:            // new(unsigned int);
-  case LibFunc::msvc_new_longlong:       // new(unsigned long long);
-  case LibFunc::msvc_new_array_int:      // new[](unsigned int);
-  case LibFunc::msvc_new_array_longlong: // new[](unsigned long long);
-    return (NumParams == 1);
-
-  case LibFunc::memset_pattern16:
+  // new(unsigned int);
+  case LibFunc_Znwj:
+  // new(unsigned long);
+  case LibFunc_Znwm:
+  // new[](unsigned int);
+  case LibFunc_Znaj:
+  // new[](unsigned long);
+  case LibFunc_Znam:
+  // new(unsigned int);
+  case LibFunc_msvc_new_int:
+  // new(unsigned long long);
+  case LibFunc_msvc_new_longlong:
+  // new[](unsigned int);
+  case LibFunc_msvc_new_array_int:
+  // new[](unsigned long long);
+  case LibFunc_msvc_new_array_longlong:
+    return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
+
+  // new(unsigned int, nothrow);
+  case LibFunc_ZnwjRKSt9nothrow_t:
+  // new(unsigned long, nothrow);
+  case LibFunc_ZnwmRKSt9nothrow_t:
+  // new[](unsigned int, nothrow);
+  case LibFunc_ZnajRKSt9nothrow_t:
+  // new[](unsigned long, nothrow);
+  case LibFunc_ZnamRKSt9nothrow_t:
+  // new(unsigned int, nothrow);
+  case LibFunc_msvc_new_int_nothrow:
+  // new(unsigned long long, nothrow);
+  case LibFunc_msvc_new_longlong_nothrow:
+  // new[](unsigned int, nothrow);
+  case LibFunc_msvc_new_array_int_nothrow:
+  // new[](unsigned long long, nothrow);
+  case LibFunc_msvc_new_array_longlong_nothrow:
+    return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
+
+  // void operator delete[](void*);
+  case LibFunc_ZdaPv:
+  // void operator delete(void*);
+  case LibFunc_ZdlPv:
+  // void operator delete[](void*);
+  case LibFunc_msvc_delete_array_ptr32:
+  // void operator delete[](void*);
+  case LibFunc_msvc_delete_array_ptr64:
+  // void operator delete(void*);
+  case LibFunc_msvc_delete_ptr32:
+  // void operator delete(void*);
+  case LibFunc_msvc_delete_ptr64:
+    return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
+
+  // void operator delete[](void*, nothrow);
+  case LibFunc_ZdaPvRKSt9nothrow_t:
+  // void operator delete[](void*, unsigned int);
+  case LibFunc_ZdaPvj:
+  // void operator delete[](void*, unsigned long);
+  case LibFunc_ZdaPvm:
+  // void operator delete(void*, nothrow);
+  case LibFunc_ZdlPvRKSt9nothrow_t:
+  // void operator delete(void*, unsigned int);
+  case LibFunc_ZdlPvj:
+  // void operator delete(void*, unsigned long);
+  case LibFunc_ZdlPvm:
+  // void operator delete[](void*, unsigned int);
+  case LibFunc_msvc_delete_array_ptr32_int:
+  // void operator delete[](void*, nothrow);
+  case LibFunc_msvc_delete_array_ptr32_nothrow:
+  // void operator delete[](void*, unsigned long long);
+  case LibFunc_msvc_delete_array_ptr64_longlong:
+  // void operator delete[](void*, nothrow);
+  case LibFunc_msvc_delete_array_ptr64_nothrow:
+  // void operator delete(void*, unsigned int);
+  case LibFunc_msvc_delete_ptr32_int:
+  // void operator delete(void*, nothrow);
+  case LibFunc_msvc_delete_ptr32_nothrow:
+  // void operator delete(void*, unsigned long long);
+  case LibFunc_msvc_delete_ptr64_longlong:
+  // void operator delete(void*, nothrow);
+  case LibFunc_msvc_delete_ptr64_nothrow:
+    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
+
+  case LibFunc_memset_pattern16:
     return (!FTy.isVarArg() && NumParams == 3 &&
-            isa<PointerType>(FTy.getParamType(0)) &&
-            isa<PointerType>(FTy.getParamType(1)) &&
-            isa<IntegerType>(FTy.getParamType(2)));
-
-  // int __nvvm_reflect(const char *);
-  case LibFunc::nvvm_reflect:
-    return (NumParams == 1 && isa<PointerType>(FTy.getParamType(0)));
-
-  case LibFunc::sin:
-  case LibFunc::sinf:
-  case LibFunc::sinl:
-  case LibFunc::cos:
-  case LibFunc::cosf:
-  case LibFunc::cosl:
-  case LibFunc::tan:
-  case LibFunc::tanf:
-  case LibFunc::tanl:
-  case LibFunc::exp:
-  case LibFunc::expf:
-  case LibFunc::expl:
-  case LibFunc::exp2:
-  case LibFunc::exp2f:
-  case LibFunc::exp2l:
-  case LibFunc::log:
-  case LibFunc::logf:
-  case LibFunc::logl:
-  case LibFunc::log10:
-  case LibFunc::log10f:
-  case LibFunc::log10l:
-  case LibFunc::log2:
-  case LibFunc::log2f:
-  case LibFunc::log2l:
-  case LibFunc::fabs:
-  case LibFunc::fabsf:
-  case LibFunc::fabsl:
-  case LibFunc::floor:
-  case LibFunc::floorf:
-  case LibFunc::floorl:
-  case LibFunc::ceil:
-  case LibFunc::ceilf:
-  case LibFunc::ceill:
-  case LibFunc::trunc:
-  case LibFunc::truncf:
-  case LibFunc::truncl:
-  case LibFunc::rint:
-  case LibFunc::rintf:
-  case LibFunc::rintl:
-  case LibFunc::nearbyint:
-  case LibFunc::nearbyintf:
-  case LibFunc::nearbyintl:
-  case LibFunc::round:
-  case LibFunc::roundf:
-  case LibFunc::roundl:
-  case LibFunc::sqrt:
-  case LibFunc::sqrtf:
-  case LibFunc::sqrtl:
+            FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1)->isPointerTy() &&
+            FTy.getParamType(2)->isIntegerTy());
+
+  case LibFunc_cxa_guard_abort:
+  case LibFunc_cxa_guard_acquire:
+  case LibFunc_cxa_guard_release:
+  case LibFunc_nvvm_reflect:
+    return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
+
+  case LibFunc_sincospi_stret:
+  case LibFunc_sincospif_stret:
+    return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy());
+
+  case LibFunc_acos:
+  case LibFunc_acos_finite:
+  case LibFunc_acosf:
+  case LibFunc_acosf_finite:
+  case LibFunc_acosh:
+  case LibFunc_acosh_finite:
+  case LibFunc_acoshf:
+  case LibFunc_acoshf_finite:
+  case LibFunc_acoshl:
+  case LibFunc_acoshl_finite:
+  case LibFunc_acosl:
+  case LibFunc_acosl_finite:
+  case LibFunc_asin:
+  case LibFunc_asin_finite:
+  case LibFunc_asinf:
+  case LibFunc_asinf_finite:
+  case LibFunc_asinh:
+  case LibFunc_asinhf:
+  case LibFunc_asinhl:
+  case LibFunc_asinl:
+  case LibFunc_asinl_finite:
+  case LibFunc_atan:
+  case LibFunc_atanf:
+  case LibFunc_atanh:
+  case LibFunc_atanh_finite:
+  case LibFunc_atanhf:
+  case LibFunc_atanhf_finite:
+  case LibFunc_atanhl:
+  case LibFunc_atanhl_finite:
+  case LibFunc_atanl:
+  case LibFunc_cbrt:
+  case LibFunc_cbrtf:
+  case LibFunc_cbrtl:
+  case LibFunc_ceil:
+  case LibFunc_ceilf:
+  case LibFunc_ceill:
+  case LibFunc_cos:
+  case LibFunc_cosf:
+  case LibFunc_cosh:
+  case LibFunc_cosh_finite:
+  case LibFunc_coshf:
+  case LibFunc_coshf_finite:
+  case LibFunc_coshl:
+  case LibFunc_coshl_finite:
+  case LibFunc_cosl:
+  case LibFunc_exp10:
+  case LibFunc_exp10_finite:
+  case LibFunc_exp10f:
+  case LibFunc_exp10f_finite:
+  case LibFunc_exp10l:
+  case LibFunc_exp10l_finite:
+  case LibFunc_exp2:
+  case LibFunc_exp2_finite:
+  case LibFunc_exp2f:
+  case LibFunc_exp2f_finite:
+  case LibFunc_exp2l:
+  case LibFunc_exp2l_finite:
+  case LibFunc_exp:
+  case LibFunc_exp_finite:
+  case LibFunc_expf:
+  case LibFunc_expf_finite:
+  case LibFunc_expl:
+  case LibFunc_expl_finite:
+  case LibFunc_expm1:
+  case LibFunc_expm1f:
+  case LibFunc_expm1l:
+  case LibFunc_fabs:
+  case LibFunc_fabsf:
+  case LibFunc_fabsl:
+  case LibFunc_floor:
+  case LibFunc_floorf:
+  case LibFunc_floorl:
+  case LibFunc_log10:
+  case LibFunc_log10_finite:
+  case LibFunc_log10f:
+  case LibFunc_log10f_finite:
+  case LibFunc_log10l:
+  case LibFunc_log10l_finite:
+  case LibFunc_log1p:
+  case LibFunc_log1pf:
+  case LibFunc_log1pl:
+  case LibFunc_log2:
+  case LibFunc_log2_finite:
+  case LibFunc_log2f:
+  case LibFunc_log2f_finite:
+  case LibFunc_log2l:
+  case LibFunc_log2l_finite:
+  case LibFunc_log:
+  case LibFunc_log_finite:
+  case LibFunc_logb:
+  case LibFunc_logbf:
+  case LibFunc_logbl:
+  case LibFunc_logf:
+  case LibFunc_logf_finite:
+  case LibFunc_logl:
+  case LibFunc_logl_finite:
+  case LibFunc_nearbyint:
+  case LibFunc_nearbyintf:
+  case LibFunc_nearbyintl:
+  case LibFunc_rint:
+  case LibFunc_rintf:
+  case LibFunc_rintl:
+  case LibFunc_round:
+  case LibFunc_roundf:
+  case LibFunc_roundl:
+  case LibFunc_sin:
+  case LibFunc_sinf:
+  case LibFunc_sinh:
+  case LibFunc_sinh_finite:
+  case LibFunc_sinhf:
+  case LibFunc_sinhf_finite:
+  case LibFunc_sinhl:
+  case LibFunc_sinhl_finite:
+  case LibFunc_sinl:
+  case LibFunc_sqrt:
+  case LibFunc_sqrt_finite:
+  case LibFunc_sqrtf:
+  case LibFunc_sqrtf_finite:
+  case LibFunc_sqrtl:
+  case LibFunc_sqrtl_finite:
+  case LibFunc_tan:
+  case LibFunc_tanf:
+  case LibFunc_tanh:
+  case LibFunc_tanhf:
+  case LibFunc_tanhl:
+  case LibFunc_tanl:
+  case LibFunc_trunc:
+  case LibFunc_truncf:
+  case LibFunc_truncl:
     return (NumParams == 1 && FTy.getReturnType()->isFloatingPointTy() &&
             FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::fmin:
-  case LibFunc::fminf:
-  case LibFunc::fminl:
-  case LibFunc::fmax:
-  case LibFunc::fmaxf:
-  case LibFunc::fmaxl:
-  case LibFunc::copysign:
-  case LibFunc::copysignf:
-  case LibFunc::copysignl:
-  case LibFunc::pow:
-  case LibFunc::powf:
-  case LibFunc::powl:
+  case LibFunc_atan2:
+  case LibFunc_atan2_finite:
+  case LibFunc_atan2f:
+  case LibFunc_atan2f_finite:
+  case LibFunc_atan2l:
+  case LibFunc_atan2l_finite:
+  case LibFunc_fmin:
+  case LibFunc_fminf:
+  case LibFunc_fminl:
+  case LibFunc_fmax:
+  case LibFunc_fmaxf:
+  case LibFunc_fmaxl:
+  case LibFunc_fmod:
+  case LibFunc_fmodf:
+  case LibFunc_fmodl:
+  case LibFunc_copysign:
+  case LibFunc_copysignf:
+  case LibFunc_copysignl:
+  case LibFunc_pow:
+  case LibFunc_pow_finite:
+  case LibFunc_powf:
+  case LibFunc_powf_finite:
+  case LibFunc_powl:
+  case LibFunc_powl_finite:
     return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() &&
             FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getReturnType() == FTy.getParamType(1));
 
-  case LibFunc::ffs:
-  case LibFunc::ffsl:
-  case LibFunc::ffsll:
-  case LibFunc::fls:
-  case LibFunc::flsl:
-  case LibFunc::flsll:
+  case LibFunc_ldexp:
+  case LibFunc_ldexpf:
+  case LibFunc_ldexpl:
+    return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() &&
+            FTy.getReturnType() == FTy.getParamType(0) &&
+            FTy.getParamType(1)->isIntegerTy(32));
+
+  case LibFunc_ffs:
+  case LibFunc_ffsl:
+  case LibFunc_ffsll:
+  case LibFunc_fls:
+  case LibFunc_flsl:
+  case LibFunc_flsll:
     return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
             FTy.getParamType(0)->isIntegerTy());
 
-  case LibFunc::isdigit:
-  case LibFunc::isascii:
-  case LibFunc::toascii:
+  case LibFunc_isdigit:
+  case LibFunc_isascii:
+  case LibFunc_toascii:
+  case LibFunc_putchar:
     return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
             FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::abs:
-  case LibFunc::labs:
-  case LibFunc::llabs:
+  case LibFunc_abs:
+  case LibFunc_labs:
+  case LibFunc_llabs:
     return (NumParams == 1 && FTy.getReturnType()->isIntegerTy() &&
             FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::cxa_atexit:
+  case LibFunc_cxa_atexit:
     return (NumParams == 3 && FTy.getReturnType()->isIntegerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
 
-  case LibFunc::sinpi:
-  case LibFunc::cospi:
+  case LibFunc_sinpi:
+  case LibFunc_cospi:
     return (NumParams == 1 && FTy.getReturnType()->isDoubleTy() &&
             FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::sinpif:
-  case LibFunc::cospif:
+  case LibFunc_sinpif:
+  case LibFunc_cospif:
     return (NumParams == 1 && FTy.getReturnType()->isFloatTy() &&
             FTy.getReturnType() == FTy.getParamType(0));
 
-  default:
-    // Assume the other functions are correct.
-    // FIXME: It'd be really nice to cover them all.
-    return true;
+  case LibFunc_strnlen:
+    return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(1) &&
+            FTy.getParamType(0) == PCharTy &&
+            FTy.getParamType(1) == SizeTTy);
+
+  case LibFunc_posix_memalign:
+    return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+            FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1) == SizeTTy && FTy.getParamType(2) == SizeTTy);
+
+  case LibFunc_wcslen:
+    return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
+            FTy.getReturnType()->isIntegerTy());
+
+  case LibFunc::NumLibFuncs:
+    break;
   }
+
+  llvm_unreachable("Invalid libfunc");
 }
 
 bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
-                                       LibFunc::Func &F) const {
+                                       LibFunc &F) const {
   const DataLayout *DL =
       FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
   return getLibFunc(FDecl.getName(), F) &&
@@ -1134,6 +1381,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
         {"powf", "__svml_powf8", 8},
         {"powf", "__svml_powf16", 16},
 
+        { "__pow_finite", "__svml_pow2", 2 },
+        { "__pow_finite", "__svml_pow4", 4 },
+        { "__pow_finite", "__svml_pow8", 8 },
+
+        { "__powf_finite", "__svml_powf4", 4 },
+        { "__powf_finite", "__svml_powf8", 8 },
+        { "__powf_finite", "__svml_powf16", 16 },
+
         {"llvm.pow.f64", "__svml_pow2", 2},
         {"llvm.pow.f64", "__svml_pow4", 4},
         {"llvm.pow.f64", "__svml_pow8", 8},
@@ -1150,6 +1405,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
         {"expf", "__svml_expf8", 8},
         {"expf", "__svml_expf16", 16},
 
+        { "__exp_finite", "__svml_exp2", 2 },
+        { "__exp_finite", "__svml_exp4", 4 },
+        { "__exp_finite", "__svml_exp8", 8 },
+
+        { "__expf_finite", "__svml_expf4", 4 },
+        { "__expf_finite", "__svml_expf8", 8 },
+        { "__expf_finite", "__svml_expf16", 16 },
+
         {"llvm.exp.f64", "__svml_exp2", 2},
         {"llvm.exp.f64", "__svml_exp4", 4},
         {"llvm.exp.f64", "__svml_exp8", 8},
@@ -1166,6 +1429,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
         {"logf", "__svml_logf8", 8},
         {"logf", "__svml_logf16", 16},
 
+        { "__log_finite", "__svml_log2", 2 },
+        { "__log_finite", "__svml_log4", 4 },
+        { "__log_finite", "__svml_log8", 8 },
+
+        { "__logf_finite", "__svml_logf4", 4 },
+        { "__logf_finite", "__svml_logf8", 8 },
+        { "__logf_finite", "__svml_logf16", 16 },
+
         {"llvm.log.f64", "__svml_log2", 2},
         {"llvm.log.f64", "__svml_log4", 4},
         {"llvm.log.f64", "__svml_log8", 8},
@@ -1248,6 +1519,21 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) {
   return *Impl;
 }
 
+unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) {
+  // See also clang/lib/Basic/Targets.cpp.
+  if (T.isPS4() || T.isOSWindows() || T.isArch16Bit())
+    return 2;
+  if (T.getArch() == Triple::xcore)
+    return 1;
+  return 4;
+}
+
+unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
+  if (auto *ShortWChar = cast_or_null<ConstantAsMetadata>(
+      M.getModuleFlag("wchar_size")))
+    return cast<ConstantInt>(ShortWChar->getValue())->getZExtValue();
+  return getTargetWCharSize(Triple(M.getTargetTriple()));
+}
 
 TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
     : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 5c0d1aa..25813c6 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <utility>
 
@@ -23,6 +24,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "tti"
 
+static cl::opt<bool> UseWideMemcpyLoopLowering(
+    "use-wide-memcpy-loop-lowering", cl::init(false),
+    cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
+    cl::Hidden);
+
 namespace {
 /// \brief No-op implementation of the TTI interface using the utility base
 /// classes.
@@ -76,6 +82,11 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
 }
 
+int TargetTransformInfo::getExtCost(const Instruction *I,
+                                    const Value *Src) const {
+  return TTIImpl->getExtCost(I, Src);
+}
+
 int TargetTransformInfo::getIntrinsicCost(
     Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
   int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
@@ -83,8 +94,15 @@ int TargetTransformInfo::getIntrinsicCost(
   return Cost;
 }
 
-int TargetTransformInfo::getUserCost(const User *U) const {
-  int Cost = TTIImpl->getUserCost(U);
+unsigned
+TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                                      unsigned &JTSize) const {
+  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
+}
+
+int TargetTransformInfo::getUserCost(const User *U,
+    ArrayRef<const Value *> Operands) const {
+  int Cost = TTIImpl->getUserCost(U, Operands);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -97,13 +115,21 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
   return TTIImpl->isSourceOfDivergence(V);
 }
 
+bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
+  return TTIImpl->isAlwaysUniform(V);
+}
+
+unsigned TargetTransformInfo::getFlatAddressSpace() const {
+  return TTIImpl->getFlatAddressSpace();
+}
+
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
 
 void TargetTransformInfo::getUnrollingPreferences(
-    Loop *L, UnrollingPreferences &UP) const {
-  return TTIImpl->getUnrollingPreferences(L, UP);
+    Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
+  return TTIImpl->getUnrollingPreferences(L, SE, UP);
 }
 
 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
@@ -123,6 +149,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                         Scale, AddrSpace);
 }
 
+bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
+  return TTIImpl->isLSRCostLess(C1, C2);
+}
+
 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
   return TTIImpl->isLegalMaskedStore(DataType);
 }
@@ -139,6 +169,10 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
   return TTIImpl->isLegalMaskedGather(DataType);
 }
 
+bool TargetTransformInfo::prefersVectorizedAddressing() const {
+  return TTIImpl->prefersVectorizedAddressing();
+}
+
 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                               int64_t BaseOffset,
                                               bool HasBaseReg,
@@ -182,10 +216,29 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
   return TTIImpl->shouldBuildLookupTablesForConstant(C);
 }
 
+unsigned TargetTransformInfo::
+getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
+  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned TargetTransformInfo::
+getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                 unsigned VF) const {
+  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
+}
+
+bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
+  return TTIImpl->supportsEfficientVectorElementLoadStore();
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }
 
+bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const {
+  return TTIImpl->expandMemCmp(I, MaxLoadSize);
+}
+
 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
   return TTIImpl->enableInterleavedAccessVectorization();
 }
@@ -254,6 +307,16 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
   return TTIImpl->getRegisterBitWidth(Vector);
 }
 
+unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
+  return TTIImpl->getMinVectorRegisterBitWidth();
+}
+
+bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
+    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
+  return TTIImpl->shouldConsiderAddressTypePromotion(
+      I, AllowPromotionWithoutCommonHeader);
+}
+
 unsigned TargetTransformInfo::getCacheLineSize() const {
   return TTIImpl->getCacheLineSize();
 }
@@ -293,8 +356,10 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
 }
 
 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                          Type *Src) const {
-  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+                                 Type *Src, const Instruction *I) const {
+  assert ((I == nullptr || I->getOpcode() == Opcode) &&
+          "Opcode should reflect passed instruction.");
+  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -314,8 +379,10 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
 }
 
 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                            Type *CondTy) const {
-  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+                                 Type *CondTy, const Instruction *I) const {
+  assert ((I == nullptr || I->getOpcode() == Opcode) &&
+          "Opcode should reflect passed instruction.");
+  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -329,8 +396,11 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
 
 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
                                          unsigned Alignment,
-                                         unsigned AddressSpace) const {
-  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+                                         unsigned AddressSpace,
+                                         const Instruction *I) const {
+  assert ((I == nullptr || I->getOpcode() == Opcode) &&
+          "Opcode should reflect passed instruction.");
+  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -363,17 +433,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
 }
 
 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Type *> Tys,
-                                               FastMathFlags FMF) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+                                    ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                    unsigned ScalarizationCostPassed) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+                                            ScalarizationCostPassed);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
 
 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                               ArrayRef<Value *> Args,
-                                               FastMathFlags FMF) const {
-  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+           ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -414,11 +484,34 @@ bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
   return TTIImpl->getTgtMemIntrinsic(Inst, Info);
 }
 
+unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
+  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
+}
+
 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
     IntrinsicInst *Inst, Type *ExpectedType) const {
   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
 }
 
+Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context,
+                                                     Value *Length,
+                                                     unsigned SrcAlign,
+                                                     unsigned DestAlign) const {
+  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
+                                            DestAlign);
+}
+
+void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
+    SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+    unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
+  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
+                                             SrcAlign, DestAlign);
+}
+
+bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const {
+  return UseWideMemcpyLoopLowering;
+}
+
 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
                                               const Function *Callee) const {
   return TTIImpl->areInlineCompatible(Caller, Callee);
@@ -462,6 +555,15 @@ unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
   return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
 }
 
+bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
+                                                Type *Ty, ReductionFlags Flags) const {
+  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
+}
+
+bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
+  return TTIImpl->shouldExpandReduction(II);
+}
+
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e920c4c..86c528d 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -23,10 +23,10 @@
 //
 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
 // three fields, e.g.:
-//   !0 = metadata !{ metadata !"an example type tree" }
-//   !1 = metadata !{ metadata !"int", metadata !0 }
-//   !2 = metadata !{ metadata !"float", metadata !0 }
-//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//   !0 = !{ !"an example type tree" }
+//   !1 = !{ !"int", !0 }
+//   !2 = !{ !"float", !0 }
+//   !3 = !{ !"const float", !2, i64 1 }
 //
 // The first field is an identity field. It can be any value, usually
 // an MDString, which uniquely identifies the type. The most important
@@ -58,7 +58,7 @@
 //
 // The struct type node has a name and a list of pairs, one pair for each member
 // of the struct. The first element of each pair is a type node (a struct type
-// node or a sclar type node), specifying the type of the member, the second
+// node or a scalar type node), specifying the type of the member, the second
 // element of each pair is the offset of the member.
 //
 // Given an example
@@ -74,13 +74,13 @@
 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
 // type short) and the offset is 4.
 //
-// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
-// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
-// !2 = metadata !{metadata !"short", metadata !1}           // Scalar type node
-// !3 = metadata !{metadata !"A", metadata !2, i64 0}        // Struct type node
-// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// !0 = !{!"Simple C/C++ TBAA"}
+// !1 = !{!"omnipotent char", !0} // Scalar type node
+// !2 = !{!"short", !1}           // Scalar type node
+// !3 = !{!"A", !2, i64 0}        // Struct type node
+// !4 = !{!"B", !2, i64 0, !3, i64 4}
 //                                                           // Struct type node
-// !5 = metadata !{metadata !4, metadata !2, i64 4}          // Path tag node
+// !5 = !{!4, !2, i64 4}          // Path tag node
 //
 // The struct type nodes and the scalar type nodes form a type DAG.
 //         Root (!0)
diff --git a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
index f567541..6871e48 100644
--- a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -39,7 +39,7 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
 
 // Search for virtual calls that load from VPtr and add them to DevirtCalls.
 static void
-findLoadCallsAtConstantOffset(Module *M,
+findLoadCallsAtConstantOffset(const Module *M,
                               SmallVectorImpl<DevirtCallSite> &DevirtCalls,
                               Value *VPtr, int64_t Offset) {
   for (const Use &U : VPtr->uses()) {
@@ -62,10 +62,10 @@ findLoadCallsAtConstantOffset(Module *M,
 
 void llvm::findDevirtualizableCallsForTypeTest(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-    SmallVectorImpl<CallInst *> &Assumes, CallInst *CI) {
+    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) {
   assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
 
-  Module *M = CI->getParent()->getParent()->getParent();
+  const Module *M = CI->getParent()->getParent()->getParent();
 
   // Find llvm.assume intrinsics for this llvm.type.test call.
   for (const Use &CIU : CI->uses()) {
@@ -86,7 +86,8 @@ void llvm::findDevirtualizableCallsForTypeTest(
 void llvm::findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
     SmallVectorImpl<Instruction *> &LoadedPtrs,
-    SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI) {
+    SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
+    const CallInst *CI) {
   assert(CI->getCalledFunction()->getIntrinsicID() ==
          Intrinsic::type_checked_load);
 
@@ -96,7 +97,7 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
     return;
   }
 
-  for (Use &U : CI->uses()) {
+  for (const Use &U : CI->uses()) {
     auto CIU = U.getUser();
     if (auto EVI = dyn_cast<ExtractValueInst>(CIU)) {
       if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 0) {
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index be62858..cdfe74d 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -17,14 +17,16 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
@@ -37,6 +39,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <array>
@@ -57,8 +60,8 @@ static cl::opt<bool>
 DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
                               cl::Hidden, cl::init(true));
 
-/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
-/// 0). For vector types, returns the element type's bitwidth.
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
@@ -76,6 +79,9 @@ struct Query {
   AssumptionCache *AC;
   const Instruction *CxtI;
   const DominatorTree *DT;
+  // Unlike the other analyses, this may be a nullptr because not all clients
+  // provide it currently.
+  OptimizationRemarkEmitter *ORE;
 
   /// Set of assumptions that should be excluded from further queries.
   /// This is because of the potential for mutual recursion to cause
@@ -83,18 +89,18 @@ struct Query {
   /// classic case of this is assume(x = y), which will attempt to determine
   /// bits in x from bits in y, which will attempt to determine bits in y from
   /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
-  /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
-  /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so
-  /// on.
+  /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
+  /// (all of which can call computeKnownBits), and so on.
   std::array<const Value *, MaxDepth> Excluded;
   unsigned NumExcluded;
 
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
-        const DominatorTree *DT)
-      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), NumExcluded(0) {}
+        const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
+      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
 
   Query(const Query &Q, const Value *NewExcl)
-      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) {
+      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
+        NumExcluded(Q.NumExcluded) {
     Excluded = Q.Excluded;
     Excluded[NumExcluded++] = NewExcl;
     assert(NumExcluded <= Excluded.size());
@@ -125,15 +131,28 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
   return nullptr;
 }
 
-static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
+static void computeKnownBits(const Value *V, KnownBits &Known,
                              unsigned Depth, const Query &Q);
 
-void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
+void llvm::computeKnownBits(const Value *V, KnownBits &Known,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
-                            const DominatorTree *DT) {
-  ::computeKnownBits(V, KnownZero, KnownOne, Depth,
-                     Query(DL, AC, safeCxtI(V, CxtI), DT));
+                            const DominatorTree *DT,
+                            OptimizationRemarkEmitter *ORE) {
+  ::computeKnownBits(V, Known, Depth,
+                     Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+}
+
+static KnownBits computeKnownBits(const Value *V, unsigned Depth,
+                                  const Query &Q);
+
+KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
+                                 unsigned Depth, AssumptionCache *AC,
+                                 const Instruction *CxtI,
+                                 const DominatorTree *DT,
+                                 OptimizationRemarkEmitter *ORE) {
+  return ::computeKnownBits(V, Depth,
+                            Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
 }
 
 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
@@ -145,22 +164,24 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
   assert(LHS->getType()->isIntOrIntVectorTy() &&
          "LHS and RHS should be integers");
   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
-  APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0);
-  APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0);
-  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT);
-  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT);
-  return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+  KnownBits LHSKnown(IT->getBitWidth());
+  KnownBits RHSKnown(IT->getBitWidth());
+  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
+  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+  return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
 }
 
-static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
-                           unsigned Depth, const Query &Q);
 
-void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
-                          const DataLayout &DL, unsigned Depth,
-                          AssumptionCache *AC, const Instruction *CxtI,
-                          const DominatorTree *DT) {
-  ::ComputeSignBit(V, KnownZero, KnownOne, Depth,
-                   Query(DL, AC, safeCxtI(V, CxtI), DT));
+bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
+  for (const User *U : CxtI->users()) {
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    return false;
+  }
+  return true;
 }
 
 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
@@ -187,9 +208,8 @@ bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
                               unsigned Depth,
                               AssumptionCache *AC, const Instruction *CxtI,
                               const DominatorTree *DT) {
-  bool NonNegative, Negative;
-  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
-  return NonNegative;
+  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+  return Known.isNonNegative();
 }
 
 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
@@ -207,9 +227,8 @@ bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
                            const DominatorTree *DT) {
-  bool NonNegative, Negative;
-  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
-  return Negative;
+  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+  return Known.isNegative();
 }
 
 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
@@ -246,91 +265,65 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
 
 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
                                    bool NSW,
-                                   APInt &KnownZero, APInt &KnownOne,
-                                   APInt &KnownZero2, APInt &KnownOne2,
+                                   KnownBits &KnownOut, KnownBits &Known2,
                                    unsigned Depth, const Query &Q) {
-  if (!Add) {
-    if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
-      // We know that the top bits of C-X are clear if X contains less bits
-      // than C (i.e. no wrap-around can happen).  For example, 20-X is
-      // positive if we can prove that X is >= 0 and < 16.
-      if (!CLHS->getValue().isNegative()) {
-        unsigned BitWidth = KnownZero.getBitWidth();
-        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
-        // NLZ can't be BitWidth with no sign bit
-        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q);
-
-        // If all of the MaskV bits are known to be zero, then we know the
-        // output top bits are zero, because we now know that the output is
-        // from [0-C].
-        if ((KnownZero2 & MaskV) == MaskV) {
-          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
-          // Top bits known zero.
-          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
-        }
-      }
-    }
-  }
-
-  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned BitWidth = KnownOut.getBitWidth();
 
   // If an initial sequence of bits in the result is not needed, the
   // corresponding bits in the operands are not needed.
-  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-  computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q);
-  computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q);
+  KnownBits LHSKnown(BitWidth);
+  computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
+  computeKnownBits(Op1, Known2, Depth + 1, Q);
 
   // Carry in a 1 for a subtract, rather than a 0.
-  APInt CarryIn(BitWidth, 0);
+  uint64_t CarryIn = 0;
   if (!Add) {
     // Sum = LHS + ~RHS + 1
-    std::swap(KnownZero2, KnownOne2);
-    CarryIn.setBit(0);
+    std::swap(Known2.Zero, Known2.One);
+    CarryIn = 1;
   }
 
-  APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
-  APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn;
+  APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn;
+  APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn;
 
   // Compute known bits of the carry.
-  APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2);
-  APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2;
+  APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero);
+  APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One;
 
   // Compute set of known bits (where all three relevant bits are known).
-  APInt LHSKnown = LHSKnownZero | LHSKnownOne;
-  APInt RHSKnown = KnownZero2 | KnownOne2;
-  APInt CarryKnown = CarryKnownZero | CarryKnownOne;
-  APInt Known = LHSKnown & RHSKnown & CarryKnown;
+  APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One;
+  APInt RHSKnownUnion = Known2.Zero | Known2.One;
+  APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne;
+  APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion;
 
   assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
          "known bits of sum differ");
 
   // Compute known bits of the result.
-  KnownZero = ~PossibleSumOne & Known;
-  KnownOne = PossibleSumOne & Known;
+  KnownOut.Zero = ~PossibleSumOne & Known;
+  KnownOut.One = PossibleSumOne & Known;
 
   // Are we still trying to solve for the sign bit?
-  if (!Known.isNegative()) {
+  if (!Known.isSignBitSet()) {
     if (NSW) {
       // Adding two non-negative numbers, or subtracting a negative number from
       // a non-negative one, can't wrap into negative.
-      if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
-        KnownZero |= APInt::getSignBit(BitWidth);
+      if (LHSKnown.isNonNegative() && Known2.isNonNegative())
+        KnownOut.makeNonNegative();
       // Adding two negative numbers, or subtracting a non-negative number from
       // a negative one, can't wrap into non-negative.
-      else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
-        KnownOne |= APInt::getSignBit(BitWidth);
+      else if (LHSKnown.isNegative() && Known2.isNegative())
+        KnownOut.makeNegative();
     }
   }
 }
 
 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
-                                APInt &KnownZero, APInt &KnownOne,
-                                APInt &KnownZero2, APInt &KnownOne2,
+                                KnownBits &Known, KnownBits &Known2,
                                 unsigned Depth, const Query &Q) {
-  unsigned BitWidth = KnownZero.getBitWidth();
-  computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q);
-  computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q);
+  unsigned BitWidth = Known.getBitWidth();
+  computeKnownBits(Op1, Known, Depth + 1, Q);
+  computeKnownBits(Op0, Known2, Depth + 1, Q);
 
   bool isKnownNegative = false;
   bool isKnownNonNegative = false;
@@ -340,10 +333,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
       // The product of a number with itself is non-negative.
       isKnownNonNegative = true;
     } else {
-      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
-      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
-      bool isKnownNegativeOp1 = KnownOne.isNegative();
-      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      bool isKnownNonNegativeOp1 = Known.isNonNegative();
+      bool isKnownNonNegativeOp0 = Known2.isNonNegative();
+      bool isKnownNegativeOp1 = Known.isNegative();
+      bool isKnownNegativeOp0 = Known2.isNegative();
       // The product of two numbers with the same sign is non-negative.
       isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
         (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
@@ -361,38 +354,37 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
   // Also compute a conservative estimate for high known-0 bits.
   // More trickiness is possible, but this is sufficient for the
   // interesting case of alignment computation.
-  KnownOne.clearAllBits();
-  unsigned TrailZ = KnownZero.countTrailingOnes() +
-                    KnownZero2.countTrailingOnes();
-  unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
-                             KnownZero2.countLeadingOnes(),
+  unsigned TrailZ = Known.countMinTrailingZeros() +
+                    Known2.countMinTrailingZeros();
+  unsigned LeadZ =  std::max(Known.countMinLeadingZeros() +
+                             Known2.countMinLeadingZeros(),
                              BitWidth) - BitWidth;
 
   TrailZ = std::min(TrailZ, BitWidth);
   LeadZ = std::min(LeadZ, BitWidth);
-  KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
-              APInt::getHighBitsSet(BitWidth, LeadZ);
+  Known.resetAll();
+  Known.Zero.setLowBits(TrailZ);
+  Known.Zero.setHighBits(LeadZ);
 
   // Only make use of no-wrap flags if we failed to compute the sign bit
   // directly.  This matters if the multiplication always overflows, in
   // which case we prefer to follow the result of the direct computation,
   // though as the program is invoking undefined behaviour we can choose
   // whatever we like here.
-  if (isKnownNonNegative && !KnownOne.isNegative())
-    KnownZero.setBit(BitWidth - 1);
-  else if (isKnownNegative && !KnownZero.isNegative())
-    KnownOne.setBit(BitWidth - 1);
+  if (isKnownNonNegative && !Known.isNegative())
+    Known.makeNonNegative();
+  else if (isKnownNegative && !Known.isNonNegative())
+    Known.makeNegative();
 }
 
 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                             APInt &KnownZero,
-                                             APInt &KnownOne) {
-  unsigned BitWidth = KnownZero.getBitWidth();
+                                             KnownBits &Known) {
+  unsigned BitWidth = Known.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
 
-  KnownZero.setAllBits();
-  KnownOne.setAllBits();
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
 
   for (unsigned i = 0; i < NumRanges; ++i) {
     ConstantInt *Lower =
@@ -406,8 +398,8 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
         (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
 
     APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
-    KnownOne &= Range.getUnsignedMax() & Mask;
-    KnownZero &= ~Range.getUnsignedMax() & Mask;
+    Known.One &= Range.getUnsignedMax() & Mask;
+    Known.Zero &= ~Range.getUnsignedMax() & Mask;
   }
 }
 
@@ -516,15 +508,14 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
   return !isEphemeralValueOf(Inv, CxtI);
 }
 
-static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
-                                       APInt &KnownOne, unsigned Depth,
-                                       const Query &Q) {
+static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
+                                       unsigned Depth, const Query &Q) {
   // Use of assumptions is context-sensitive. If we don't have a context, we
   // cannot use them!
   if (!Q.AC || !Q.CxtI)
     return;
 
-  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned BitWidth = Known.getBitWidth();
 
   // Note that the patterns below need to be kept in sync with the code
   // in AssumptionCache::updateAffectedValues.
@@ -549,8 +540,13 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
 
     if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
       assert(BitWidth == 1 && "assume operand is not i1?");
-      KnownZero.clearAllBits();
-      KnownOne.setAllBits();
+      Known.setAllOnes();
+      return;
+    }
+    if (match(Arg, m_Not(m_Specific(V))) &&
+        isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+      assert(BitWidth == 1 && "assume operand is not i1?");
+      Known.setAllZero();
       return;
     }
 
@@ -568,282 +564,284 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
     // assume(v = a)
     if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
         Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      KnownZero |= RHSKnownZero;
-      KnownOne  |= RHSKnownOne;
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      Known.Zero |= RHSKnown.Zero;
+      Known.One  |= RHSKnown.One;
     // assume(v & b = a)
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
-      computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits MaskKnown(BitWidth);
+      computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
 
       // For those bits in the mask that are known to be one, we can propagate
       // known bits from the RHS to V.
-      KnownZero |= RHSKnownZero & MaskKnownOne;
-      KnownOne  |= RHSKnownOne  & MaskKnownOne;
+      Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+      Known.One  |= RHSKnown.One  & MaskKnown.One;
     // assume(~(v & b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
-      computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits MaskKnown(BitWidth);
+      computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
 
       // For those bits in the mask that are known to be one, we can propagate
       // inverted known bits from the RHS to V.
-      KnownZero |= RHSKnownOne  & MaskKnownOne;
-      KnownOne  |= RHSKnownZero & MaskKnownOne;
+      Known.Zero |= RHSKnown.One  & MaskKnown.One;
+      Known.One  |= RHSKnown.Zero & MaskKnown.One;
     // assume(v | b = a)
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
-      computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits BKnown(BitWidth);
+      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 
       // For those bits in B that are known to be zero, we can propagate known
       // bits from the RHS to V.
-      KnownZero |= RHSKnownZero & BKnownZero;
-      KnownOne  |= RHSKnownOne  & BKnownZero;
+      Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+      Known.One  |= RHSKnown.One  & BKnown.Zero;
     // assume(~(v | b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
-      computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits BKnown(BitWidth);
+      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 
       // For those bits in B that are known to be zero, we can propagate
       // inverted known bits from the RHS to V.
-      KnownZero |= RHSKnownOne  & BKnownZero;
-      KnownOne  |= RHSKnownZero & BKnownZero;
+      Known.Zero |= RHSKnown.One  & BKnown.Zero;
+      Known.One  |= RHSKnown.Zero & BKnown.Zero;
     // assume(v ^ b = a)
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
-      computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits BKnown(BitWidth);
+      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 
       // For those bits in B that are known to be zero, we can propagate known
       // bits from the RHS to V. For those bits in B that are known to be one,
       // we can propagate inverted known bits from the RHS to V.
-      KnownZero |= RHSKnownZero & BKnownZero;
-      KnownOne  |= RHSKnownOne  & BKnownZero;
-      KnownZero |= RHSKnownOne  & BKnownOne;
-      KnownOne  |= RHSKnownZero & BKnownOne;
+      Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+      Known.One  |= RHSKnown.One  & BKnown.Zero;
+      Known.Zero |= RHSKnown.One  & BKnown.One;
+      Known.One  |= RHSKnown.Zero & BKnown.One;
     // assume(~(v ^ b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
-      APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
-      computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+      KnownBits BKnown(BitWidth);
+      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 
       // For those bits in B that are known to be zero, we can propagate
       // inverted known bits from the RHS to V. For those bits in B that are
       // known to be one, we can propagate known bits from the RHS to V.
-      KnownZero |= RHSKnownOne  & BKnownZero;
-      KnownOne  |= RHSKnownZero & BKnownZero;
-      KnownZero |= RHSKnownZero & BKnownOne;
-      KnownOne  |= RHSKnownOne  & BKnownOne;
+      Known.Zero |= RHSKnown.One  & BKnown.Zero;
+      Known.One  |= RHSKnown.Zero & BKnown.Zero;
+      Known.Zero |= RHSKnown.Zero & BKnown.One;
+      Known.One  |= RHSKnown.One  & BKnown.One;
     // assume(v << c = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
       // bits in V shifted to the right by C.
-      KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
-      KnownOne  |= RHSKnownOne.lshr(C->getZExtValue());
+      RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+      Known.Zero |= RHSKnown.Zero;
+      RHSKnown.One.lshrInPlace(C->getZExtValue());
+      Known.One  |= RHSKnown.One;
     // assume(~(v << c) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
       // to known bits in V shifted to the right by C.
-      KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
-      KnownOne  |= RHSKnownZero.lshr(C->getZExtValue());
+      RHSKnown.One.lshrInPlace(C->getZExtValue());
+      Known.Zero |= RHSKnown.One;
+      RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+      Known.One  |= RHSKnown.Zero;
     // assume(v >> c = a)
     } else if (match(Arg,
-                     m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
-                                                m_AShr(m_V, m_ConstantInt(C))),
+                     m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
                               m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
       // bits in V shifted to the right by C.
-      KnownZero |= RHSKnownZero << C->getZExtValue();
-      KnownOne  |= RHSKnownOne  << C->getZExtValue();
+      Known.Zero |= RHSKnown.Zero << C->getZExtValue();
+      Known.One  |= RHSKnown.One  << C->getZExtValue();
     // assume(~(v >> c) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
-                                             m_LShr(m_V, m_ConstantInt(C)),
-                                             m_AShr(m_V, m_ConstantInt(C)))),
+    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
                                    m_Value(A))) &&
                Pred == ICmpInst::ICMP_EQ &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
       // to known bits in V shifted to the right by C.
-      KnownZero |= RHSKnownOne  << C->getZExtValue();
-      KnownOne  |= RHSKnownZero << C->getZExtValue();
+      Known.Zero |= RHSKnown.One  << C->getZExtValue();
+      Known.One  |= RHSKnown.Zero << C->getZExtValue();
     // assume(v >=_s c) where c is non-negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SGE &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnownZero.isNegative()) {
+      if (RHSKnown.isNonNegative()) {
         // We know that the sign bit is zero.
-        KnownZero |= APInt::getSignBit(BitWidth);
+        Known.makeNonNegative();
       }
     // assume(v >_s c) where c is at least -1.
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SGT &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+      if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
         // We know that the sign bit is zero.
-        KnownZero |= APInt::getSignBit(BitWidth);
+        Known.makeNonNegative();
       }
     // assume(v <=_s c) where c is negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SLE &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnownOne.isNegative()) {
+      if (RHSKnown.isNegative()) {
         // We know that the sign bit is one.
-        KnownOne |= APInt::getSignBit(BitWidth);
+        Known.makeNegative();
       }
     // assume(v <_s c) where c is non-positive
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_SLT &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+      if (RHSKnown.isZero() || RHSKnown.isNegative()) {
         // We know that the sign bit is one.
-        KnownOne |= APInt::getSignBit(BitWidth);
+        Known.makeNegative();
       }
     // assume(v <=_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_ULE &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
       // Whatever high bits in c are zero are known to be zero.
-      KnownZero |=
-        APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
-    // assume(v <_u c)
+      Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+      // assume(v <_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
                Pred == ICmpInst::ICMP_ULT &&
                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-      computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+      KnownBits RHSKnown(BitWidth);
+      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
       // Whatever high bits in c are zero are known to be zero (if c is a power
       // of 2, then one more).
       if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
-        KnownZero |=
-          APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
+        Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
       else
-        KnownZero |=
-          APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+        Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
     }
   }
 
   // If assumptions conflict with each other or previous known bits, then we
-  // have a logical fallacy. This should only happen when a program has
-  // undefined behavior. We can't assert/crash, so clear out the known bits and
-  // hope for the best.
-
-  // FIXME: Publish a warning/remark that we have encountered UB or the compiler
-  // is broken.
-
-  // FIXME: Implement a stronger version of "I give up" by invalidating/clearing
-  // the assumption cache. This should indicate that the cache is corrupted so
-  // future callers will not waste time repopulating it with faulty assumptions.
-
-  if ((KnownZero & KnownOne) != 0) {
-    KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
+  // have a logical fallacy. It's possible that the assumption is not reachable,
+  // so this isn't a real bug. On the other hand, the program may have undefined
+  // behavior, or we might have a bug in the compiler. We can't assert/crash, so
+  // clear out the known bits, try to warn the user, and hope for the best.
+  if (Known.Zero.intersects(Known.One)) {
+    Known.resetAll();
+
+    if (Q.ORE) {
+      auto *CxtI = const_cast<Instruction *>(Q.CxtI);
+      OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI);
+      Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may "
+                         "have undefined behavior, or compiler may have "
+                         "internal error.");
+    }
   }
 }
 
 // Compute known bits from a shift operator, including those with a
-// non-constant shift amount. KnownZero and KnownOne are the outputs of this
-// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
-// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
-// functors that, given the known-zero or known-one bits respectively, and a
-// shift amount, compute the implied known-zero or known-one bits of the shift
-// operator's result respectively for that shift amount. The results from calling
-// KZF and KOF are conservatively combined for all permitted shift amounts.
+// non-constant shift amount. Known is the outputs of this function. Known2 is a
+// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are
+// operator-specific functors that, given the known-zero or known-one bits
+// respectively, and a shift amount, compute the implied known-zero or known-one
+// bits of the shift operator's result respectively for that shift amount. The
+// results from calling KZF and KOF are conservatively combined for all
+// permitted shift amounts.
 static void computeKnownBitsFromShiftOperator(
-    const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2,
-    APInt &KnownOne2, unsigned Depth, const Query &Q,
+    const Operator *I, KnownBits &Known, KnownBits &Known2,
+    unsigned Depth, const Query &Q,
     function_ref<APInt(const APInt &, unsigned)> KZF,
     function_ref<APInt(const APInt &, unsigned)> KOF) {
-  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned BitWidth = Known.getBitWidth();
 
   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-    KnownZero = KZF(KnownZero, ShiftAmt);
-    KnownOne  = KOF(KnownOne, ShiftAmt);
-    // If there is conflict between KnownZero and KnownOne, this must be an
-    // overflowing left shift, so the shift result is undefined. Clear KnownZero
-    // and KnownOne bits so that other code could propagate this undef.
-    if ((KnownZero & KnownOne) != 0) {
-      KnownZero.clearAllBits();
-      KnownOne.clearAllBits();
-    }
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+    Known.Zero = KZF(Known.Zero, ShiftAmt);
+    Known.One  = KOF(Known.One, ShiftAmt);
+    // If there is conflict between Known.Zero and Known.One, this must be an
+    // overflowing left shift, so the shift result is undefined. Clear Known
+    // bits so that other code could propagate this undef.
+    if ((Known.Zero & Known.One) != 0)
+      Known.resetAll();
 
     return;
   }
 
-  computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
+  computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
 
-  // Note: We cannot use KnownZero.getLimitedValue() here, because if
+  // If the shift amount could be greater than or equal to the bit-width of the LHS, the
+  // value could be undef, so we don't know anything about it.
+  if ((~Known.Zero).uge(BitWidth)) {
+    Known.resetAll();
+    return;
+  }
+
+  // Note: We cannot use Known.Zero.getLimitedValue() here, because if
   // BitWidth > 64 and any upper bits are known, we'll end up returning the
   // limit value (which implies all bits are known).
-  uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
-  uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+  uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
+  uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
 
   // It would be more-clearly correct to use the two temporaries for this
   // calculation. Reusing the APInts here to prevent unnecessary allocations.
-  KnownZero.clearAllBits();
-  KnownOne.clearAllBits();
+  Known.resetAll();
 
   // If we know the shifter operand is nonzero, we can sometimes infer more
   // known bits. However this is expensive to compute, so be lazy about it and
@@ -851,16 +849,18 @@ static void computeKnownBitsFromShiftOperator(
   Optional<bool> ShifterOperandIsNonZero;
 
   // Early exit if we can't constrain any well-defined shift amount.
-  if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+  if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
+      !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
     ShifterOperandIsNonZero =
         isKnownNonZero(I->getOperand(1), Depth + 1, Q);
     if (!*ShifterOperandIsNonZero)
       return;
   }
 
-  computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 
-  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
   for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
     // Combine the shifted known input bits only for those shift amounts
     // compatible with its known constraints.
@@ -879,8 +879,8 @@ static void computeKnownBitsFromShiftOperator(
         continue;
     }
 
-    KnownZero &= KZF(KnownZero2, ShiftAmt);
-    KnownOne  &= KOF(KnownOne2, ShiftAmt);
+    Known.Zero &= KZF(Known2.Zero, ShiftAmt);
+    Known.One  &= KOF(Known2.One, ShiftAmt);
   }
 
   // If there are no compatible shift amounts, then we've proven that the shift
@@ -888,33 +888,30 @@ static void computeKnownBitsFromShiftOperator(
   // return anything we'd like, but we need to make sure the sets of known bits
   // stay disjoint (it should be better for some other code to actually
   // propagate the undef than to pick a value here using known bits).
-  if ((KnownZero & KnownOne) != 0) {
-    KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-  }
+  if (Known.Zero.intersects(Known.One))
+    Known.resetAll();
 }
 
-static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
-                                         APInt &KnownOne, unsigned Depth,
-                                         const Query &Q) {
-  unsigned BitWidth = KnownZero.getBitWidth();
+static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
+                                         unsigned Depth, const Query &Q) {
+  unsigned BitWidth = Known.getBitWidth();
 
-  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+  KnownBits Known2(Known);
   switch (I->getOpcode()) {
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+      computeKnownBitsFromRangeMetadata(*MD, Known);
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
-    KnownOne &= KnownOne2;
+    Known.One &= Known2.One;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    KnownZero |= KnownZero2;
+    Known.Zero |= Known2.Zero;
 
     // and(x, add (x, -1)) is a common idiom that always clears the low bit;
     // here we handle the more general case of adding any odd number by
@@ -922,118 +919,113 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
     // TODO: This could be generalized to clearing any bit set in y where the
     // following bit is known to be unset in y.
     Value *Y = nullptr;
-    if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
-                                      m_Value(Y))) ||
-        match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
-                                      m_Value(Y)))) {
-      APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
-      computeKnownBits(Y, KnownZero3, KnownOne3, Depth + 1, Q);
-      if (KnownOne3.countTrailingOnes() > 0)
-        KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+    if (!Known.Zero[0] && !Known.One[0] &&
+        (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+                                       m_Value(Y))) ||
+         match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+                                       m_Value(Y))))) {
+      Known2.resetAll();
+      computeKnownBits(Y, Known2, Depth + 1, Q);
+      if (Known2.countMinTrailingOnes() > 0)
+        Known.Zero.setBit(0);
     }
     break;
   }
   case Instruction::Or: {
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
-    KnownZero &= KnownZero2;
+    Known.Zero &= Known2.Zero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
-    KnownOne |= KnownOne2;
+    Known.One |= Known2.One;
     break;
   }
   case Instruction::Xor: {
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    KnownZero = KnownZeroOut;
+    Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+    Known.Zero = std::move(KnownZeroOut);
     break;
   }
   case Instruction::Mul: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
-                        KnownOne, KnownZero2, KnownOne2, Depth, Q);
+    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
+                        Known2, Depth, Q);
     break;
   }
   case Instruction::UDiv: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
-    unsigned LeadZ = KnownZero2.countLeadingOnes();
-
-    KnownOne2.clearAllBits();
-    KnownZero2.clearAllBits();
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
-    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
-    if (RHSUnknownLeadingOnes != BitWidth)
-      LeadZ = std::min(BitWidth,
-                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
-
-    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+    unsigned LeadZ = Known2.countMinLeadingZeros();
+
+    Known2.resetAll();
+    computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+    unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
+    if (RHSMaxLeadingZeros != BitWidth)
+      LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
+
+    Known.Zero.setHighBits(LeadZ);
     break;
   }
   case Instruction::Select: {
-    computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
-
-    const Value *LHS;
-    const Value *RHS;
+    const Value *LHS, *RHS;
     SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
     if (SelectPatternResult::isMinOrMax(SPF)) {
-      computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
-      computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q);
+      computeKnownBits(RHS, Known, Depth + 1, Q);
+      computeKnownBits(LHS, Known2, Depth + 1, Q);
     } else {
-      computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
-      computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+      computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
+      computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
     }
 
     unsigned MaxHighOnes = 0;
     unsigned MaxHighZeros = 0;
     if (SPF == SPF_SMAX) {
       // If both sides are negative, the result is negative.
-      if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1])
+      if (Known.isNegative() && Known2.isNegative())
         // We can derive a lower bound on the result by taking the max of the
         // leading one bits.
         MaxHighOnes =
-            std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+            std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
       // If either side is non-negative, the result is non-negative.
-      else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1])
+      else if (Known.isNonNegative() || Known2.isNonNegative())
         MaxHighZeros = 1;
     } else if (SPF == SPF_SMIN) {
       // If both sides are non-negative, the result is non-negative.
-      if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1])
+      if (Known.isNonNegative() && Known2.isNonNegative())
         // We can derive an upper bound on the result by taking the max of the
         // leading zero bits.
-        MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
-                                KnownZero2.countLeadingOnes());
+        MaxHighZeros = std::max(Known.countMinLeadingZeros(),
+                                Known2.countMinLeadingZeros());
       // If either side is negative, the result is negative.
-      else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1])
+      else if (Known.isNegative() || Known2.isNegative())
         MaxHighOnes = 1;
     } else if (SPF == SPF_UMAX) {
       // We can derive a lower bound on the result by taking the max of the
       // leading one bits.
       MaxHighOnes =
-          std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+          std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
     } else if (SPF == SPF_UMIN) {
       // We can derive an upper bound on the result by taking the max of the
       // leading zero bits.
       MaxHighZeros =
-          std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes());
+          std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
     }
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     if (MaxHighOnes > 0)
-      KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes);
+      Known.One.setHighBits(MaxHighOnes);
     if (MaxHighZeros > 0)
-      KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros);
+      Known.Zero.setHighBits(MaxHighZeros);
     break;
   }
   case Instruction::FPTrunc:
@@ -1057,14 +1049,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
     SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
-    KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
-    KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-    KnownZero = KnownZero.zextOrTrunc(BitWidth);
-    KnownOne = KnownOne.zextOrTrunc(BitWidth);
+    Known = Known.zextOrTrunc(SrcBitWidth);
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+    Known = Known.zextOrTrunc(BitWidth);
     // Any top bits are known to be zero.
     if (BitWidth > SrcBitWidth)
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+      Known.Zero.setBitsFrom(SrcBitWidth);
     break;
   }
   case Instruction::BitCast: {
@@ -1073,7 +1063,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
       break;
     }
     break;
@@ -1082,90 +1072,75 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
 
-    KnownZero = KnownZero.trunc(SrcBitWidth);
-    KnownOne = KnownOne.trunc(SrcBitWidth);
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
-
+    Known = Known.trunc(SrcBitWidth);
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
-    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
-      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    Known = Known.sext(BitWidth);
     break;
   }
   case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) {
-      APInt KZResult =
-          (KnownZero << ShiftAmt) |
-          APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
+      APInt KZResult = KnownZero << ShiftAmt;
+      KZResult.setLowBits(ShiftAmt); // Low bits known 0.
       // If this shift has "nsw" keyword, then the result is either a poison
       // value or has the same sign bit as the first operand.
-      if (NSW && KnownZero.isNegative())
-        KZResult.setBit(BitWidth - 1);
+      if (NSW && KnownZero.isSignBitSet())
+        KZResult.setSignBit();
       return KZResult;
     };
 
-    auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) {
+    auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
       APInt KOResult = KnownOne << ShiftAmt;
-      if (NSW && KnownOne.isNegative())
-        KOResult.setBit(BitWidth - 1);
+      if (NSW && KnownOne.isSignBitSet())
+        KOResult.setSignBit();
       return KOResult;
     };
 
-    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
-                                      KnownZero2, KnownOne2, Depth, Q, KZF,
-                                      KOF);
+    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
     break;
   }
   case Instruction::LShr: {
     // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
-      return APIntOps::lshr(KnownZero, ShiftAmt) |
-             // High bits known zero.
-             APInt::getHighBitsSet(BitWidth, ShiftAmt);
+    auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
+      APInt KZResult = KnownZero.lshr(ShiftAmt);
+      // High bits known zero.
+      KZResult.setHighBits(ShiftAmt);
+      return KZResult;
     };
 
-    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
-      return APIntOps::lshr(KnownOne, ShiftAmt);
+    auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
+      return KnownOne.lshr(ShiftAmt);
     };
 
-    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
-                                      KnownZero2, KnownOne2, Depth, Q, KZF,
-                                      KOF);
+    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
     break;
   }
   case Instruction::AShr: {
     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
-      return APIntOps::ashr(KnownZero, ShiftAmt);
+    auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
+      return KnownZero.ashr(ShiftAmt);
     };
 
-    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
-      return APIntOps::ashr(KnownOne, ShiftAmt);
+    auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
+      return KnownOne.ashr(ShiftAmt);
     };
 
-    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
-                                      KnownZero2, KnownOne2, Depth, Q, KZF,
-                                      KOF);
+    computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
     break;
   }
   case Instruction::Sub: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
-                           KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
-                           Q);
+                           Known, Known2, Depth, Q);
     break;
   }
   case Instruction::Add: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
-                           KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
-                           Q);
+                           Known, Known2, Depth, Q);
     break;
   }
   case Instruction::SRem:
@@ -1173,37 +1148,33 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1,
-                         Q);
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 
         // The low bits of the first operand are unchanged by the srem.
-        KnownZero = KnownZero2 & LowBits;
-        KnownOne = KnownOne2 & LowBits;
+        Known.Zero = Known2.Zero & LowBits;
+        Known.One = Known2.One & LowBits;
 
         // If the first operand is non-negative or has all low bits zero, then
         // the upper bits are all zero.
-        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
-          KnownZero |= ~LowBits;
+        if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
+          Known.Zero |= ~LowBits;
 
         // If the first operand is negative and not all low bits are zero, then
         // the upper bits are all one.
-        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
-          KnownOne |= ~LowBits;
+        if (Known2.isNegative() && LowBits.intersects(Known2.One))
+          Known.One |= ~LowBits;
 
-        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+        break;
       }
     }
 
     // The sign bit is the LHS's sign bit, except when the result of the
     // remainder is zero.
-    if (KnownZero.isNonNegative()) {
-      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
-                       Q);
-      // If it's known zero, our sign bit is also zero.
-      if (LHSKnownZero.isNegative())
-        KnownZero.setBit(BitWidth - 1);
-    }
+    computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+    // If it's known zero, our sign bit is also zero.
+    if (Known2.isNonNegative())
+      Known.makeNonNegative();
 
     break;
   case Instruction::URem: {
@@ -1211,22 +1182,22 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
       const APInt &RA = Rem->getValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-        KnownZero |= ~LowBits;
-        KnownOne &= LowBits;
+        computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+        Known.Zero |= ~LowBits;
+        Known.One &= LowBits;
         break;
       }
     }
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
 
-    unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
-                                KnownZero2.countLeadingOnes());
-    KnownOne.clearAllBits();
-    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+    unsigned Leaders =
+        std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
+    Known.resetAll();
+    Known.Zero.setHighBits(Leaders);
     break;
   }
 
@@ -1237,16 +1208,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
       Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
 
     if (Align > 0)
-      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+      Known.Zero.setLowBits(countTrailingZeros(Align));
     break;
   }
   case Instruction::GetElementPtr: {
     // Analyze all of the subscripts of this getelementptr instruction
     // to determine if we can prove known low zero bits.
-    APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
-    computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1,
-                     Q);
-    unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+    KnownBits LocalKnown(BitWidth);
+    computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
+    unsigned TrailZ = LocalKnown.countMinTrailingZeros();
 
     gep_type_iterator GTI = gep_type_begin(I);
     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
@@ -1276,15 +1246,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
         }
         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
         uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy);
-        LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
-        computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q);
+        LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
+        computeKnownBits(Index, LocalKnown, Depth + 1, Q);
         TrailZ = std::min(TrailZ,
                           unsigned(countTrailingZeros(TypeSize) +
-                                   LocalKnownZero.countTrailingOnes()));
+                                   LocalKnown.countMinTrailingZeros()));
       }
     }
 
-    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
+    Known.Zero.setLowBits(TrailZ);
     break;
   }
   case Instruction::PHI: {
@@ -1319,15 +1289,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
             break;
           // Ok, we have a PHI of the form L op= R. Check for low
           // zero bits.
-          computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q);
+          computeKnownBits(R, Known2, Depth + 1, Q);
 
           // We need to take the minimum number of known bits
-          APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
-          computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
+          KnownBits Known3(Known);
+          computeKnownBits(L, Known3, Depth + 1, Q);
 
-          KnownZero = APInt::getLowBitsSet(
-              BitWidth, std::min(KnownZero2.countTrailingOnes(),
-                                 KnownZero3.countTrailingOnes()));
+          Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
+                                         Known3.countMinTrailingZeros()));
 
           if (DontImproveNonNegativePhiBits)
             break;
@@ -1344,25 +1313,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
             // (add non-negative, non-negative) --> non-negative
             // (add negative, negative) --> negative
             if (Opcode == Instruction::Add) {
-              if (KnownZero2.isNegative() && KnownZero3.isNegative())
-                KnownZero.setBit(BitWidth - 1);
-              else if (KnownOne2.isNegative() && KnownOne3.isNegative())
-                KnownOne.setBit(BitWidth - 1);
+              if (Known2.isNonNegative() && Known3.isNonNegative())
+                Known.makeNonNegative();
+              else if (Known2.isNegative() && Known3.isNegative())
+                Known.makeNegative();
             }
 
             // (sub nsw non-negative, negative) --> non-negative
             // (sub nsw negative, non-negative) --> negative
             else if (Opcode == Instruction::Sub && LL == I) {
-              if (KnownZero2.isNegative() && KnownOne3.isNegative())
-                KnownZero.setBit(BitWidth - 1);
-              else if (KnownOne2.isNegative() && KnownZero3.isNegative())
-                KnownOne.setBit(BitWidth - 1);
+              if (Known2.isNonNegative() && Known3.isNegative())
+                Known.makeNonNegative();
+              else if (Known2.isNegative() && Known3.isNonNegative())
+                Known.makeNegative();
             }
 
             // (mul nsw non-negative, non-negative) --> non-negative
-            else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
-                     KnownZero3.isNegative())
-              KnownZero.setBit(BitWidth - 1);
+            else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
+                     Known3.isNonNegative())
+              Known.makeNonNegative();
           }
 
           break;
@@ -1376,27 +1345,26 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
-    if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+    if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) {
       // Skip if every incoming value references to ourself.
       if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
         break;
 
-      KnownZero = APInt::getAllOnesValue(BitWidth);
-      KnownOne = APInt::getAllOnesValue(BitWidth);
+      Known.Zero.setAllBits();
+      Known.One.setAllBits();
       for (Value *IncValue : P->incoming_values()) {
         // Skip direct self references.
         if (IncValue == P) continue;
 
-        KnownZero2 = APInt(BitWidth, 0);
-        KnownOne2 = APInt(BitWidth, 0);
+        Known2 = KnownBits(BitWidth);
         // Recurse, but cap the recursion to one level, because we don't
         // want to waste time spinning around in loops.
-        computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q);
-        KnownZero &= KnownZero2;
-        KnownOne &= KnownOne2;
+        computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
+        Known.Zero &= Known2.Zero;
+        Known.One &= Known2.One;
         // If all bits have been ruled out, there's no need to check
         // more operands.
-        if (!KnownZero && !KnownOne)
+        if (!Known.Zero && !Known.One)
           break;
       }
     }
@@ -1408,45 +1376,60 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
     // and then intersect with known bits based on other properties of the
     // function.
     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+      computeKnownBitsFromRangeMetadata(*MD, Known);
     if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
-      computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q);
-      KnownZero |= KnownZero2;
-      KnownOne |= KnownOne2;
+      computeKnownBits(RV, Known2, Depth + 1, Q);
+      Known.Zero |= Known2.Zero;
+      Known.One |= Known2.One;
     }
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
+      case Intrinsic::bitreverse:
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+        Known.Zero |= Known2.Zero.reverseBits();
+        Known.One |= Known2.One.reverseBits();
+        break;
       case Intrinsic::bswap:
-        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
-        KnownZero |= KnownZero2.byteSwap();
-        KnownOne |= KnownOne2.byteSwap();
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+        Known.Zero |= Known2.Zero.byteSwap();
+        Known.One |= Known2.One.byteSwap();
         break;
-      case Intrinsic::ctlz:
+      case Intrinsic::ctlz: {
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+        // If we have a known 1, its position is our upper bound.
+        unsigned PossibleLZ = Known2.One.countLeadingZeros();
+        // If this call is undefined for 0, the result will be less than 2^n.
+        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+          PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
+        unsigned LowBits = Log2_32(PossibleLZ)+1;
+        Known.Zero.setBitsFrom(LowBits);
+        break;
+      }
       case Intrinsic::cttz: {
-        unsigned LowBits = Log2_32(BitWidth)+1;
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+        // If we have a known 1, its position is our upper bound.
+        unsigned PossibleTZ = Known2.One.countTrailingZeros();
         // If this call is undefined for 0, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
-          LowBits -= 1;
-        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+          PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
+        unsigned LowBits = Log2_32(PossibleTZ)+1;
+        Known.Zero.setBitsFrom(LowBits);
         break;
       }
       case Intrinsic::ctpop: {
-        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
         // We can bound the space the count needs.  Also, bits known to be zero
         // can't contribute to the population.
-        unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
-        unsigned LeadingZeros =
-          APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
-        assert(LeadingZeros <= BitWidth);
-        KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
-        KnownOne &= ~KnownZero;
+        unsigned BitsPossiblySet = Known2.countMaxPopulation();
+        unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+        Known.Zero.setBitsFrom(LowBits);
         // TODO: we could bound KnownOne using the lower bound on the number
         // of bits which might be set provided by popcnt KnownOne2.
         break;
       }
       case Intrinsic::x86_sse42_crc32_64_64:
-        KnownZero |= APInt::getHighBitsSet(64, 32);
+        Known.Zero.setBitsFrom(32);
         break;
       }
     }
@@ -1456,7 +1439,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
     // tracking the specific element. But at least we might find information
     // valid for all elements of the vector (for example if vector is sign
     // extended, shifted, etc).
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
     break;
   case Instruction::ExtractValue:
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
@@ -1468,20 +1451,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
         case Intrinsic::uadd_with_overflow:
         case Intrinsic::sadd_with_overflow:
           computeKnownBitsAddSub(true, II->getArgOperand(0),
-                                 II->getArgOperand(1), false, KnownZero,
-                                 KnownOne, KnownZero2, KnownOne2, Depth, Q);
+                                 II->getArgOperand(1), false, Known, Known2,
+                                 Depth, Q);
           break;
         case Intrinsic::usub_with_overflow:
         case Intrinsic::ssub_with_overflow:
           computeKnownBitsAddSub(false, II->getArgOperand(0),
-                                 II->getArgOperand(1), false, KnownZero,
-                                 KnownOne, KnownZero2, KnownOne2, Depth, Q);
+                                 II->getArgOperand(1), false, Known, Known2,
+                                 Depth, Q);
           break;
         case Intrinsic::umul_with_overflow:
         case Intrinsic::smul_with_overflow:
           computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
-                              KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
-                              Q);
+                              Known, Known2, Depth, Q);
           break;
         }
       }
@@ -1490,7 +1472,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
 }
 
 /// Determine which bits of V are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bit sets.
+/// them.
+KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
+  KnownBits Known(getBitWidth(V->getType(), Q.DL));
+  computeKnownBits(V, Known, Depth, Q);
+  return Known;
+}
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the Known bit set.
 ///
 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
 /// we cannot optimize based on the assumption that it is zero without changing
@@ -1504,33 +1494,29 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
-                      unsigned Depth, const Query &Q) {
+void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
+                      const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
-  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned BitWidth = Known.getBitWidth();
 
-  assert((V->getType()->isIntOrIntVectorTy() ||
-          V->getType()->getScalarType()->isPointerTy()) &&
+  assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
+          V->getType()->isPtrOrPtrVectorTy()) &&
          "Not integer or pointer type!");
-  assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
-         (!V->getType()->isIntOrIntVectorTy() ||
-          V->getType()->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth &&
-         KnownOne.getBitWidth() == BitWidth &&
-         "V, KnownOne and KnownZero should have same BitWidth");
+  assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth &&
+         "V and Known should have same BitWidth");
+  (void)BitWidth;
 
   const APInt *C;
   if (match(V, m_APInt(C))) {
     // We know all of the bits for a scalar constant or a splat vector constant!
-    KnownOne = *C;
-    KnownZero = ~KnownOne;
+    Known.One = *C;
+    Known.Zero = ~Known.One;
     return;
   }
   // Null and aggregate-zero are all-zeros.
   if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
-    KnownOne.clearAllBits();
-    KnownZero = APInt::getAllOnesValue(BitWidth);
+    Known.setAllZero();
     return;
   }
   // Handle a constant vector by taking the intersection of the known bits of
@@ -1538,12 +1524,12 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
     // We know that CDS must be a vector of integers. Take the intersection of
     // each element.
-    KnownZero.setAllBits(); KnownOne.setAllBits();
-    APInt Elt(KnownZero.getBitWidth(), 0);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
+    APInt Elt(BitWidth, 0);
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       Elt = CDS->getElementAsInteger(i);
-      KnownZero &= ~Elt;
-      KnownOne &= Elt;
+      Known.Zero &= ~Elt;
+      Known.One &= Elt;
     }
     return;
   }
@@ -1551,25 +1537,24 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
   if (const auto *CV = dyn_cast<ConstantVector>(V)) {
     // We know that CV must be a vector of integers. Take the intersection of
     // each element.
-    KnownZero.setAllBits(); KnownOne.setAllBits();
-    APInt Elt(KnownZero.getBitWidth(), 0);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
+    APInt Elt(BitWidth, 0);
     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
       Constant *Element = CV->getAggregateElement(i);
       auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
       if (!ElementCI) {
-        KnownZero.clearAllBits();
-        KnownOne.clearAllBits();
+        Known.resetAll();
         return;
       }
       Elt = ElementCI->getValue();
-      KnownZero &= ~Elt;
-      KnownOne &= Elt;
+      Known.Zero &= ~Elt;
+      Known.One &= Elt;
     }
     return;
   }
 
   // Start out not knowing anything.
-  KnownZero.clearAllBits(); KnownOne.clearAllBits();
+  Known.resetAll();
 
   // We can't imply anything about undefs.
   if (isa<UndefValue>(V))
@@ -1588,44 +1573,27 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
   // the bits of its aliasee.
   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
     if (!GA->isInterposable())
-      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
     return;
   }
 
   if (const Operator *I = dyn_cast<Operator>(V))
-    computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q);
+    computeKnownBitsFromOperator(I, Known, Depth, Q);
 
-  // Aligned pointers have trailing zeros - refine KnownZero set
+  // Aligned pointers have trailing zeros - refine Known.Zero set
   if (V->getType()->isPointerTy()) {
     unsigned Align = V->getPointerAlignment(Q.DL);
     if (Align)
-      KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+      Known.Zero.setLowBits(countTrailingZeros(Align));
   }
 
-  // computeKnownBitsFromAssume strictly refines KnownZero and
-  // KnownOne. Therefore, we run them after computeKnownBitsFromOperator.
+  // computeKnownBitsFromAssume strictly refines Known.
+  // Therefore, we run them after computeKnownBitsFromOperator.
 
   // Check whether a nearby assume intrinsic can determine some known bits.
-  computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q);
+  computeKnownBitsFromAssume(V, Known, Depth, Q);
 
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-}
-
-/// Determine whether the sign bit is known to be zero or one.
-/// Convenience wrapper around computeKnownBits.
-void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
-                    unsigned Depth, const Query &Q) {
-  unsigned BitWidth = getBitWidth(V->getType(), Q.DL);
-  if (!BitWidth) {
-    KnownZero = false;
-    KnownOne = false;
-    return;
-  }
-  APInt ZeroBits(BitWidth, 0);
-  APInt OneBits(BitWidth, 0);
-  computeKnownBits(V, ZeroBits, OneBits, Depth, Q);
-  KnownOne = OneBits[BitWidth - 1];
-  KnownZero = ZeroBits[BitWidth - 1];
+  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
 }
 
 /// Return true if the given value is known to have exactly one
@@ -1648,9 +1616,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
   if (match(V, m_Shl(m_One(), m_Value())))
     return true;
 
-  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
-  // bottom.  If it is shifted off the bottom then the result is undefined.
-  if (match(V, m_LShr(m_SignBit(), m_Value())))
+  // (signmask) >>l X is clearly a power of two if the one is not shifted off
+  // the bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignMask(), m_Value())))
     return true;
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
@@ -1697,18 +1665,18 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
           return true;
 
       unsigned BitWidth = V->getType()->getScalarSizeInBits();
-      APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
-      computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q);
+      KnownBits LHSBits(BitWidth);
+      computeKnownBits(X, LHSBits, Depth, Q);
 
-      APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
-      computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q);
+      KnownBits RHSBits(BitWidth);
+      computeKnownBits(Y, RHSBits, Depth, Q);
       // If i8 V is a power of two or zero:
       //  ZeroBits: 1 1 1 0 1 1 1 1
       // ~ZeroBits: 0 0 0 1 0 0 0 0
-      if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2())
+      if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
         // If OrZero isn't set, we cannot give back a zero result.
         // Make sure either the LHS or RHS has a bit set.
-        if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue())
+        if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
           return true;
     }
   }
@@ -1805,10 +1773,12 @@ static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value)
   return true;
 }
 
-/// Return true if the given value is known to be non-zero when defined.
-/// For vectors return true if every element is known to be non-zero when
-/// defined. Supports values with integer or pointer type and vectors of
-/// integers.
+/// Return true if the given value is known to be non-zero when defined. For
+/// vectors, return true if every element is known to be non-zero when
+/// defined. For pointers, if the context instruction and dominator tree are
+/// specified, perform context-sensitive analysis and return true if the
+/// pointer couldn't possibly be null at the specified instruction.
+/// Supports values with integer or pointer type and vectors of integers.
 bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   if (auto *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
@@ -1851,7 +1821,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
 
   // Check for pointer simplifications.
   if (V->getType()->isPointerTy()) {
-    if (isKnownNonNull(V))
+    if (isKnownNonNullAt(V, Q.CxtI, Q.DT))
       return true;
     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
       if (isGEPKnownNonNull(GEP, Depth, Q))
@@ -1871,16 +1841,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
 
   // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
   // if the lowest bit is shifted off the end.
-  if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+  if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
     // shl nuw can't remove any non-zero bits.
     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     if (BO->hasNoUnsignedWrap())
       return isKnownNonZero(X, Depth, Q);
 
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
-    if (KnownOne[0])
+    KnownBits Known(BitWidth);
+    computeKnownBits(X, Known, Depth, Q);
+    if (Known.One[0])
       return true;
   }
   // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
@@ -1891,25 +1860,20 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     if (BO->isExact())
       return isKnownNonZero(X, Depth, Q);
 
-    bool XKnownNonNegative, XKnownNegative;
-    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q);
-    if (XKnownNegative)
+    KnownBits Known = computeKnownBits(X, Depth, Q);
+    if (Known.isNegative())
       return true;
 
     // If the shifter operand is a constant, and all of the bits shifted
     // out are known to be zero, and X is known non-zero then at least one
     // non-zero bit must remain.
     if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
-      APInt KnownZero(BitWidth, 0);
-      APInt KnownOne(BitWidth, 0);
-      computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
-
       auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
       // Is there a known one in the portion not shifted out?
-      if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+      if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
         return true;
       // Are all the bits to be shifted out known zero?
-      if (KnownZero.countTrailingOnes() >= ShiftVal)
+      if (Known.countMinTrailingZeros() >= ShiftVal)
         return isKnownNonZero(X, Depth, Q);
     }
   }
@@ -1919,40 +1883,34 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   }
   // X + Y.
   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
-    bool XKnownNonNegative, XKnownNegative;
-    bool YKnownNonNegative, YKnownNegative;
-    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q);
-    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Depth, Q);
+    KnownBits XKnown = computeKnownBits(X, Depth, Q);
+    KnownBits YKnown = computeKnownBits(Y, Depth, Q);
 
     // If X and Y are both non-negative (as signed values) then their sum is not
     // zero unless both X and Y are zero.
-    if (XKnownNonNegative && YKnownNonNegative)
+    if (XKnown.isNonNegative() && YKnown.isNonNegative())
       if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q))
         return true;
 
     // If X and Y are both negative (as signed values) then their sum is not
     // zero unless both X and Y equal INT_MIN.
-    if (BitWidth && XKnownNegative && YKnownNegative) {
-      APInt KnownZero(BitWidth, 0);
-      APInt KnownOne(BitWidth, 0);
+    if (XKnown.isNegative() && YKnown.isNegative()) {
       APInt Mask = APInt::getSignedMaxValue(BitWidth);
       // The sign bit of X is set.  If some other bit is set then X is not equal
       // to INT_MIN.
-      computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
-      if ((KnownOne & Mask) != 0)
+      if (XKnown.One.intersects(Mask))
         return true;
       // The sign bit of Y is set.  If some other bit is set then Y is not equal
       // to INT_MIN.
-      computeKnownBits(Y, KnownZero, KnownOne, Depth, Q);
-      if ((KnownOne & Mask) != 0)
+      if (YKnown.One.intersects(Mask))
         return true;
     }
 
     // The sum of a non-negative number and a power of two is not zero.
-    if (XKnownNonNegative &&
+    if (XKnown.isNonNegative() &&
         isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
       return true;
-    if (YKnownNonNegative &&
+    if (YKnown.isNonNegative() &&
         isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
       return true;
   }
@@ -1992,17 +1950,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     }
     // Check if all incoming values are non-zero constant.
     bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
-      return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZeroValue();
+      return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
     });
     if (AllNonZeroConstants)
       return true;
   }
 
-  if (!BitWidth) return false;
-  APInt KnownZero(BitWidth, 0);
-  APInt KnownOne(BitWidth, 0);
-  computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
-  return KnownOne != 0;
+  KnownBits Known(BitWidth);
+  computeKnownBits(V, Known, Depth, Q);
+  return Known.One != 0;
 }
 
 /// Return true if V2 == V1 + X, where X is known non-zero.
@@ -2022,7 +1978,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
 
 /// Return true if it is known that V1 != V2.
 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
-  if (V1->getType()->isVectorTy() || V1 == V2)
+  if (V1 == V2)
     return false;
   if (V1->getType() != V2->getType())
     // We can't look through casts yet.
@@ -2030,19 +1986,14 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
   if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q))
     return true;
 
-  if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+  if (V1->getType()->isIntOrIntVectorTy()) {
     // Are any known bits in V1 contradictory to known bits in V2? If V1
     // has a known zero where V2 has a known one, they must not be equal.
-    auto BitWidth = Ty->getBitWidth();
-    APInt KnownZero1(BitWidth, 0);
-    APInt KnownOne1(BitWidth, 0);
-    computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q);
-    APInt KnownZero2(BitWidth, 0);
-    APInt KnownOne2(BitWidth, 0);
-    computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q);
-
-    auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
-    if (OppositeBits.getBoolValue())
+    KnownBits Known1 = computeKnownBits(V1, 0, Q);
+    KnownBits Known2 = computeKnownBits(V2, 0, Q);
+
+    if (Known1.Zero.intersects(Known2.One) ||
+        Known2.Zero.intersects(Known1.One))
       return true;
   }
   return false;
@@ -2059,9 +2010,9 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
 /// for all of the elements in the vector.
 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                        const Query &Q) {
-  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
-  return (KnownZero & Mask) == Mask;
+  KnownBits Known(Mask.getBitWidth());
+  computeKnownBits(V, Known, Depth, Q);
+  return Mask.isSubsetOf(Known.Zero);
 }
 
 /// For vector constants, loop over the elements and find the constant with the
@@ -2092,13 +2043,29 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V,
   return MinSignBits;
 }
 
+static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
+                                       const Query &Q);
+
+static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
+                                   const Query &Q) {
+  unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
+  assert(Result > 0 && "At least one sign bit needs to be present!");
+  return Result;
+}
+
 /// Return the number of times the sign bit of the register is replicated into
 /// the other bits. We know that at least 1 bit is always equal to the sign bit
 /// (itself), but other cases can give us information. For example, immediately
 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
 /// other, so we return 3. For vectors, return the number of sign bits for the
 /// vector element with the mininum number of known sign bits.
-unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
+static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
+                                       const Query &Q) {
+
+  // We return the minimum number of sign bits that are guaranteed to be present
+  // in V, so for undef we have to conservatively return 1.  We don't have the
+  // same behavior for poison though -- that's a FIXME today.
+
   unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
@@ -2174,7 +2141,10 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
     // ashr X, C   -> adds C sign bits.  Vectors too.
     const APInt *ShAmt;
     if (match(U->getOperand(1), m_APInt(ShAmt))) {
-      Tmp += ShAmt->getZExtValue();
+      unsigned ShAmtLimited = ShAmt->getZExtValue();
+      if (ShAmtLimited >= TyBits)
+        break;  // Bad shift.
+      Tmp += ShAmtLimited;
       if (Tmp > TyBits) Tmp = TyBits;
     }
     return Tmp;
@@ -2220,17 +2190,17 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
     // Special case decrementing a value (ADD X, -1):
     if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
       if (CRHS->isAllOnesValue()) {
-        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+        KnownBits Known(TyBits);
+        computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+        if ((Known.Zero | 1).isAllOnesValue())
           return TyBits;
 
         // If we are subtracting one from a positive number, there is no carry
         // out of the result.
-        if (KnownZero.isNegative())
+        if (Known.isNonNegative())
           return Tmp;
       }
 
@@ -2245,16 +2215,16 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
     // Handle NEG.
     if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
       if (CLHS->isNullValue()) {
-        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
+        KnownBits Known(TyBits);
+        computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+        if ((Known.Zero | 1).isAllOnesValue())
           return TyBits;
 
         // If the input is known to be positive (the sign bit is known clear),
         // the output of the NEG has the same number of sign bits as the input.
-        if (KnownZero.isNegative())
+        if (Known.isNonNegative())
           return Tmp2;
 
         // Otherwise, we treat this like a SUB.
@@ -2306,19 +2276,12 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
   if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
     return VecSignBits;
 
-  APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-  computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
+  KnownBits Known(TyBits);
+  computeKnownBits(V, Known, Depth, Q);
 
   // If we know that the sign bit is either zero or one, determine the number of
   // identical bits in the top of the input value.
-  if (KnownZero.isNegative())
-    return std::max(FirstAnswer, KnownZero.countLeadingOnes());
-
-  if (KnownOne.isNegative())
-    return std::max(FirstAnswer, KnownOne.countLeadingOnes());
-
-  // computeKnownBits gave us no extra information about the top bits.
-  return FirstAnswer;
+  return std::max(FirstAnswer, Known.countMinSignBits());
 }
 
 /// This function computes the integer multiple of Base that equals V.
@@ -2368,6 +2331,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
   case Instruction::SExt:
     if (!LookThroughSExt) return false;
     // otherwise fall through to ZExt
+    LLVM_FALLTHROUGH;
   case Instruction::ZExt:
     return ComputeMultiple(I->getOperand(0), Base, Multiple,
                            LookThroughSExt, Depth+1);
@@ -2453,7 +2417,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
   if (!TLI)
     return Intrinsic::not_intrinsic;
 
-  LibFunc::Func Func;
+  LibFunc Func;
   // We're going to make assumptions on the semantics of the functions, check
   // that the target knows that it's available in this environment and it does
   // not have local linkage.
@@ -2468,81 +2432,81 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
   switch (Func) {
   default:
     break;
-  case LibFunc::sin:
-  case LibFunc::sinf:
-  case LibFunc::sinl:
+  case LibFunc_sin:
+  case LibFunc_sinf:
+  case LibFunc_sinl:
     return Intrinsic::sin;
-  case LibFunc::cos:
-  case LibFunc::cosf:
-  case LibFunc::cosl:
+  case LibFunc_cos:
+  case LibFunc_cosf:
+  case LibFunc_cosl:
     return Intrinsic::cos;
-  case LibFunc::exp:
-  case LibFunc::expf:
-  case LibFunc::expl:
+  case LibFunc_exp:
+  case LibFunc_expf:
+  case LibFunc_expl:
     return Intrinsic::exp;
-  case LibFunc::exp2:
-  case LibFunc::exp2f:
-  case LibFunc::exp2l:
+  case LibFunc_exp2:
+  case LibFunc_exp2f:
+  case LibFunc_exp2l:
     return Intrinsic::exp2;
-  case LibFunc::log:
-  case LibFunc::logf:
-  case LibFunc::logl:
+  case LibFunc_log:
+  case LibFunc_logf:
+  case LibFunc_logl:
     return Intrinsic::log;
-  case LibFunc::log10:
-  case LibFunc::log10f:
-  case LibFunc::log10l:
+  case LibFunc_log10:
+  case LibFunc_log10f:
+  case LibFunc_log10l:
     return Intrinsic::log10;
-  case LibFunc::log2:
-  case LibFunc::log2f:
-  case LibFunc::log2l:
+  case LibFunc_log2:
+  case LibFunc_log2f:
+  case LibFunc_log2l:
     return Intrinsic::log2;
-  case LibFunc::fabs:
-  case LibFunc::fabsf:
-  case LibFunc::fabsl:
+  case LibFunc_fabs:
+  case LibFunc_fabsf:
+  case LibFunc_fabsl:
     return Intrinsic::fabs;
-  case LibFunc::fmin:
-  case LibFunc::fminf:
-  case LibFunc::fminl:
+  case LibFunc_fmin:
+  case LibFunc_fminf:
+  case LibFunc_fminl:
     return Intrinsic::minnum;
-  case LibFunc::fmax:
-  case LibFunc::fmaxf:
-  case LibFunc::fmaxl:
+  case LibFunc_fmax:
+  case LibFunc_fmaxf:
+  case LibFunc_fmaxl:
     return Intrinsic::maxnum;
-  case LibFunc::copysign:
-  case LibFunc::copysignf:
-  case LibFunc::copysignl:
+  case LibFunc_copysign:
+  case LibFunc_copysignf:
+  case LibFunc_copysignl:
     return Intrinsic::copysign;
-  case LibFunc::floor:
-  case LibFunc::floorf:
-  case LibFunc::floorl:
+  case LibFunc_floor:
+  case LibFunc_floorf:
+  case LibFunc_floorl:
     return Intrinsic::floor;
-  case LibFunc::ceil:
-  case LibFunc::ceilf:
-  case LibFunc::ceill:
+  case LibFunc_ceil:
+  case LibFunc_ceilf:
+  case LibFunc_ceill:
     return Intrinsic::ceil;
-  case LibFunc::trunc:
-  case LibFunc::truncf:
-  case LibFunc::truncl:
+  case LibFunc_trunc:
+  case LibFunc_truncf:
+  case LibFunc_truncl:
     return Intrinsic::trunc;
-  case LibFunc::rint:
-  case LibFunc::rintf:
-  case LibFunc::rintl:
+  case LibFunc_rint:
+  case LibFunc_rintf:
+  case LibFunc_rintl:
     return Intrinsic::rint;
-  case LibFunc::nearbyint:
-  case LibFunc::nearbyintf:
-  case LibFunc::nearbyintl:
+  case LibFunc_nearbyint:
+  case LibFunc_nearbyintf:
+  case LibFunc_nearbyintl:
     return Intrinsic::nearbyint;
-  case LibFunc::round:
-  case LibFunc::roundf:
-  case LibFunc::roundl:
+  case LibFunc_round:
+  case LibFunc_roundf:
+  case LibFunc_roundl:
     return Intrinsic::round;
-  case LibFunc::pow:
-  case LibFunc::powf:
-  case LibFunc::powl:
+  case LibFunc_pow:
+  case LibFunc_powf:
+  case LibFunc_powl:
     return Intrinsic::pow;
-  case LibFunc::sqrt:
-  case LibFunc::sqrtf:
-  case LibFunc::sqrtl:
+  case LibFunc_sqrt:
+  case LibFunc_sqrtf:
+  case LibFunc_sqrtl:
     if (ICS->hasNoNaNs())
       return Intrinsic::sqrt;
     return Intrinsic::not_intrinsic;
@@ -2607,6 +2571,11 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
                                             const TargetLibraryInfo *TLI,
                                             bool SignBitOnly,
                                             unsigned Depth) {
+  // TODO: This function does not do the right thing when SignBitOnly is true
+  // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
+  // which flips the sign bits of NaNs.  See
+  // https://llvm.org/bugs/show_bug.cgi?id=31702.
+
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
     return !CFP->getValueAPF().isNegative() ||
            (!SignBitOnly && CFP->getValueAPF().isZero());
@@ -2650,7 +2619,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
     return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
                                            Depth + 1);
   case Instruction::Call:
-    Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
+    const auto *CI = cast<CallInst>(I);
+    Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
     switch (IID) {
     default:
       break;
@@ -2667,16 +2637,37 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
     case Intrinsic::exp:
     case Intrinsic::exp2:
     case Intrinsic::fabs:
-    case Intrinsic::sqrt:
       return true;
+
+    case Intrinsic::sqrt:
+      // sqrt(x) is always >= -0 or NaN.  Moreover, sqrt(x) == -0 iff x == -0.
+      if (!SignBitOnly)
+        return true;
+      return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
+                                 CannotBeNegativeZero(CI->getOperand(0), TLI));
+
     case Intrinsic::powi:
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
         // powi(x,n) is non-negative if n is even.
-        if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+        if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
           return true;
       }
+      // TODO: This is not correct.  Given that exp is an integer, here are the
+      // ways that pow can return a negative value:
+      //
+      //   pow(x, exp)    --> negative if exp is odd and x is negative.
+      //   pow(-0, exp)   --> -inf if exp is negative odd.
+      //   pow(-0, exp)   --> -0 if exp is positive odd.
+      //   pow(-inf, exp) --> -0 if exp is negative odd.
+      //   pow(-inf, exp) --> -inf if exp is positive odd.
+      //
+      // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
+      // but we must return false if x == -0.  Unfortunately we do not currently
+      // have a way of expressing this constraint.  See details in
+      // https://llvm.org/bugs/show_bug.cgi?id=31702.
       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
                                              Depth + 1);
+
     case Intrinsic::fma:
     case Intrinsic::fmuladd:
       // x*x+y is non-negative if y is non-negative.
@@ -2970,14 +2961,16 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
   return Ptr;
 }
 
-bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
+bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
+                                       unsigned CharSize) {
   // Make sure the GEP has exactly three arguments.
   if (GEP->getNumOperands() != 3)
     return false;
 
-  // Make sure the index-ee is a pointer to array of i8.
+  // Make sure the index-ee is a pointer to array of \p CharSize integers.
+  // CharSize.
   ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
-  if (!AT || !AT->getElementType()->isIntegerTy(8))
+  if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
     return false;
 
   // Check to make sure that the first operand of the GEP is an integer and
@@ -2989,11 +2982,9 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
   return true;
 }
 
-/// This function computes the length of a null-terminated C string pointed to
-/// by V. If successful, it returns true and returns the string in Str.
-/// If unsuccessful, it returns false.
-bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
-                                 uint64_t Offset, bool TrimAtNul) {
+bool llvm::getConstantDataArrayInfo(const Value *V,
+                                    ConstantDataArraySlice &Slice,
+                                    unsigned ElementSize, uint64_t Offset) {
   assert(V);
 
   // Look through bitcast instructions and geps.
@@ -3004,7 +2995,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     // The GEP operator should be based on a pointer to string constant, and is
     // indexing into the string constant.
-    if (!isGEPBasedOnPointerToString(GEP))
+    if (!isGEPBasedOnPointerToString(GEP, ElementSize))
       return false;
 
     // If the second index isn't a ConstantInt, then this is a variable index
@@ -3015,8 +3006,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
       StartIdx = CI->getZExtValue();
     else
       return false;
-    return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
-                                 TrimAtNul);
+    return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
+                                    StartIdx + Offset);
   }
 
   // The GEP instruction, constant or instruction, must reference a global
@@ -3026,30 +3017,72 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
 
-  // Handle the all-zeros case.
+  const ConstantDataArray *Array;
+  ArrayType *ArrayTy;
   if (GV->getInitializer()->isNullValue()) {
-    // This is a degenerate case. The initializer is constant zero so the
-    // length of the string must be zero.
-    Str = "";
-    return true;
+    Type *GVTy = GV->getValueType();
+    if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
+      // A zeroinitializer for the array; there is no ConstantDataArray.
+      Array = nullptr;
+    } else {
+      const DataLayout &DL = GV->getParent()->getDataLayout();
+      uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy);
+      uint64_t Length = SizeInBytes / (ElementSize / 8);
+      if (Length <= Offset)
+        return false;
+
+      Slice.Array = nullptr;
+      Slice.Offset = 0;
+      Slice.Length = Length - Offset;
+      return true;
+    }
+  } else {
+    // This must be a ConstantDataArray.
+    Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
+    if (!Array)
+      return false;
+    ArrayTy = Array->getType();
   }
+  if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
+    return false;
 
-  // This must be a ConstantDataArray.
-  const auto *Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
-  if (!Array || !Array->isString())
+  uint64_t NumElts = ArrayTy->getArrayNumElements();
+  if (Offset > NumElts)
     return false;
 
-  // Get the number of elements in the array.
-  uint64_t NumElts = Array->getType()->getArrayNumElements();
+  Slice.Array = Array;
+  Slice.Offset = Offset;
+  Slice.Length = NumElts - Offset;
+  return true;
+}
 
-  // Start out with the entire array in the StringRef.
-  Str = Array->getAsString();
+/// This function computes the length of a null-terminated C string pointed to
+/// by V. If successful, it returns true and returns the string in Str.
+/// If unsuccessful, it returns false.
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+                                 uint64_t Offset, bool TrimAtNul) {
+  ConstantDataArraySlice Slice;
+  if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
+    return false;
 
-  if (Offset > NumElts)
+  if (Slice.Array == nullptr) {
+    if (TrimAtNul) {
+      Str = StringRef();
+      return true;
+    }
+    if (Slice.Length == 1) {
+      Str = StringRef("", 1);
+      return true;
+    }
+    // We cannot instantiate a StringRef as we do not have an appropriate string
+    // of 0s at hand.
     return false;
+  }
 
+  // Start out with the entire array in the StringRef.
+  Str = Slice.Array->getAsString();
   // Skip over 'offset' bytes.
-  Str = Str.substr(Offset);
+  Str = Str.substr(Slice.Offset);
 
   if (TrimAtNul) {
     // Trim off the \0 and anything after it.  If the array is not nul
@@ -3067,7 +3100,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
 static uint64_t GetStringLengthH(const Value *V,
-                                 SmallPtrSetImpl<const PHINode*> &PHIs) {
+                                 SmallPtrSetImpl<const PHINode*> &PHIs,
+                                 unsigned CharSize) {
   // Look through noop bitcast instructions.
   V = V->stripPointerCasts();
 
@@ -3080,7 +3114,7 @@ static uint64_t GetStringLengthH(const Value *V,
     // If it was new, see if all the input strings are the same length.
     uint64_t LenSoFar = ~0ULL;
     for (Value *IncValue : PN->incoming_values()) {
-      uint64_t Len = GetStringLengthH(IncValue, PHIs);
+      uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
       if (Len == 0) return 0; // Unknown length -> unknown.
 
       if (Len == ~0ULL) continue;
@@ -3096,9 +3130,9 @@ static uint64_t GetStringLengthH(const Value *V,
 
   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
   if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
-    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
     if (Len1 == 0) return 0;
-    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
     if (Len2 == 0) return 0;
     if (Len1 == ~0ULL) return Len2;
     if (Len2 == ~0ULL) return Len1;
@@ -3107,20 +3141,30 @@ static uint64_t GetStringLengthH(const Value *V,
   }
 
   // Otherwise, see if we can read the string.
-  StringRef StrData;
-  if (!getConstantStringInfo(V, StrData))
+  ConstantDataArraySlice Slice;
+  if (!getConstantDataArrayInfo(V, Slice, CharSize))
     return 0;
 
-  return StrData.size()+1;
+  if (Slice.Array == nullptr)
+    return 1;
+
+  // Search for nul characters
+  unsigned NullIndex = 0;
+  for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
+    if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
+      break;
+  }
+
+  return NullIndex + 1;
 }
 
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
-uint64_t llvm::GetStringLength(const Value *V) {
+uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
   if (!V->getType()->isPointerTy()) return 0;
 
   SmallPtrSet<const PHINode*, 32> PHIs;
-  uint64_t Len = GetStringLengthH(V, PHIs);
+  uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
   // an empty string as a length.
   return Len == ~0ULL ? 1 : Len;
@@ -3167,6 +3211,9 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
       if (GA->isInterposable())
         return V;
       V = GA->getAliasee();
+    } else if (isa<AllocaInst>(V)) {
+      // An alloca can't be further simplified.
+      return V;
     } else {
       if (auto CS = CallSite(V))
         if (Value *RV = CS.getReturnedArgOperand()) {
@@ -3177,7 +3224,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
         // TODO: Acquire a DominatorTree and AssumptionCache and use them.
-        if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
+        if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
           V = Simplified;
           continue;
         }
@@ -3230,6 +3277,69 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
   } while (!Worklist.empty());
 }
 
+/// This is the function that does the work of looking through basic
+/// ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+  do {
+    if (const Operator *U = dyn_cast<Operator>(V)) {
+      // If we find a ptrtoint, we can transfer control back to the
+      // regular getUnderlyingObjectFromInt.
+      if (U->getOpcode() == Instruction::PtrToInt)
+        return U->getOperand(0);
+      // If we find an add of a constant, a multiplied value, or a phi, it's
+      // likely that the other operand will lead us to the base
+      // object. We don't have to worry about the case where the
+      // object address is somehow being computed by the multiply,
+      // because our callers only care when the result is an
+      // identifiable object.
+      if (U->getOpcode() != Instruction::Add ||
+          (!isa<ConstantInt>(U->getOperand(1)) &&
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+           !isa<PHINode>(U->getOperand(1))))
+        return V;
+      V = U->getOperand(0);
+    } else {
+      return V;
+    }
+    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+  } while (true);
+}
+
+/// This is a wrapper around GetUnderlyingObjects and adds support for basic
+/// ptrtoint+arithmetic+inttoptr sequences.
+void llvm::getUnderlyingObjectsForCodeGen(const Value *V,
+                          SmallVectorImpl<Value *> &Objects,
+                          const DataLayout &DL) {
+  SmallPtrSet<const Value *, 16> Visited;
+  SmallVector<const Value *, 4> Working(1, V);
+  do {
+    V = Working.pop_back_val();
+
+    SmallVector<Value *, 4> Objs;
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+
+    for (Value *V : Objs) {
+      if (!Visited.insert(V).second)
+        continue;
+      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+        const Value *O =
+          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+        if (O->getType()->isPointerTy()) {
+          Working.push_back(O);
+          continue;
+        }
+      }
+      // If GetUnderlyingObjects fails to find an identifiable object,
+      // getUnderlyingObjectsForCodeGen also fails for safety.
+      if (!isIdentifiedObject(V)) {
+        Objects.clear();
+        return;
+      }
+      Objects.push_back(const_cast<Value *>(V));
+    }
+  } while (!Working.empty());
+}
+
 /// Return true if the only users of this pointer are lifetime markers.
 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
   for (const User *U : V->users()) {
@@ -3298,59 +3408,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
                                               LI->getAlignment(), DL, CtxI, DT);
   }
   case Instruction::Call: {
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-      switch (II->getIntrinsicID()) {
-      // These synthetic intrinsics have no side-effects and just mark
-      // information about their operands.
-      // FIXME: There are other no-op synthetic instructions that potentially
-      // should be considered at least *safe* to speculate...
-      case Intrinsic::dbg_declare:
-      case Intrinsic::dbg_value:
-        return true;
+    auto *CI = cast<const CallInst>(Inst);
+    const Function *Callee = CI->getCalledFunction();
 
-      case Intrinsic::bitreverse:
-      case Intrinsic::bswap:
-      case Intrinsic::ctlz:
-      case Intrinsic::ctpop:
-      case Intrinsic::cttz:
-      case Intrinsic::objectsize:
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-      case Intrinsic::umul_with_overflow:
-      case Intrinsic::usub_with_overflow:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions except for setting errno.
-      case Intrinsic::sqrt:
-      case Intrinsic::fma:
-      case Intrinsic::fmuladd:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions, and the corresponding libm functions never set errno.
-      case Intrinsic::trunc:
-      case Intrinsic::copysign:
-      case Intrinsic::fabs:
-      case Intrinsic::minnum:
-      case Intrinsic::maxnum:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions, which never overflow when operating on the IEEE754 types
-      // that we support, and never set errno otherwise.
-      case Intrinsic::ceil:
-      case Intrinsic::floor:
-      case Intrinsic::nearbyint:
-      case Intrinsic::rint:
-      case Intrinsic::round:
-        return true;
-      // TODO: are convert_{from,to}_fp16 safe?
-      // TODO: can we list target-specific intrinsics here?
-      default: break;
-      }
-    }
-    return false; // The called function could have undefined behavior or
-                  // side-effects, even if marked readnone nounwind.
+    // The called function could have undefined behavior or side-effects, even
+    // if marked readnone nounwind.
+    return Callee && Callee->isSpeculatable();
   }
   case Instruction::VAArg:
   case Instruction::Alloca:
@@ -3423,6 +3486,16 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
     if (NumUsesExplored >= DomConditionsMaxUses)
       break;
     NumUsesExplored++;
+
+    // If the value is used as an argument to a call or invoke, then argument
+    // attributes may provide an answer about null-ness.
+    if (auto CS = ImmutableCallSite(U))
+      if (auto *CalledFunc = CS.getCalledFunction())
+        for (const Argument &Arg : CalledFunc->args())
+          if (CS.getArgOperand(Arg.getArgNo()) == V &&
+              Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
+            return true;
+
     // Consider only compare instructions uniquely controlling a branch
     CmpInst::Predicate Pred;
     if (!match(const_cast<User *>(U),
@@ -3477,38 +3550,34 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
   // we can guarantee that the result does not overflow.
   // Ref: "Hacker's Delight" by Henry Warren
   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
-  APInt LHSKnownZero(BitWidth, 0);
-  APInt LHSKnownOne(BitWidth, 0);
-  APInt RHSKnownZero(BitWidth, 0);
-  APInt RHSKnownOne(BitWidth, 0);
-  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
-                   DT);
-  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
-                   DT);
+  KnownBits LHSKnown(BitWidth);
+  KnownBits RHSKnown(BitWidth);
+  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
   // Note that underestimating the number of zero bits gives a more
   // conservative answer.
-  unsigned ZeroBits = LHSKnownZero.countLeadingOnes() +
-                      RHSKnownZero.countLeadingOnes();
+  unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
+                      RHSKnown.countMinLeadingZeros();
   // First handle the easy case: if we have enough zero bits there's
   // definitely no overflow.
   if (ZeroBits >= BitWidth)
     return OverflowResult::NeverOverflows;
 
   // Get the largest possible values for each operand.
-  APInt LHSMax = ~LHSKnownZero;
-  APInt RHSMax = ~RHSKnownZero;
+  APInt LHSMax = ~LHSKnown.Zero;
+  APInt RHSMax = ~RHSKnown.Zero;
 
   // We know the multiply operation doesn't overflow if the maximum values for
   // each operand will not overflow after we multiply them together.
   bool MaxOverflow;
-  LHSMax.umul_ov(RHSMax, MaxOverflow);
+  (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
   if (!MaxOverflow)
     return OverflowResult::NeverOverflows;
 
   // We know it always overflows if multiplying the smallest possible values for
   // the operands also results in overflow.
   bool MinOverflow;
-  LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+  (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
   if (MinOverflow)
     return OverflowResult::AlwaysOverflows;
 
@@ -3521,21 +3590,17 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
-  bool LHSKnownNonNegative, LHSKnownNegative;
-  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
-                 AC, CxtI, DT);
-  if (LHSKnownNonNegative || LHSKnownNegative) {
-    bool RHSKnownNonNegative, RHSKnownNegative;
-    ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
-                   AC, CxtI, DT);
-
-    if (LHSKnownNegative && RHSKnownNegative) {
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+  if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
+    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+
+    if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
       // The sign bit is set in both cases: this MUST overflow.
       // Create a simple add instruction, and insert it into the struct.
       return OverflowResult::AlwaysOverflows;
     }
 
-    if (LHSKnownNonNegative && RHSKnownNonNegative) {
+    if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
       // The sign bit is clear in both cases: this CANNOT overflow.
       // Create a simple add instruction, and insert it into the struct.
       return OverflowResult::NeverOverflows;
@@ -3545,6 +3610,51 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
   return OverflowResult::MayOverflow;
 }
 
+/// \brief Return true if we can prove that adding the two values of the
+/// knownbits will not overflow.
+/// Otherwise return false.
+static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
+                                    const KnownBits &RHSKnown) {
+  // Addition of two 2's complement numbers having opposite signs will never
+  // overflow.
+  if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
+      (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
+    return true;
+
+  // If either of the values is known to be non-negative, adding them can only
+  // overflow if the second is also non-negative, so we can assume that.
+  // Two non-negative numbers will only overflow if there is a carry to the 
+  // sign bit, so we can check if even when the values are as big as possible
+  // there is no overflow to the sign bit.
+  if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
+    APInt MaxLHS = ~LHSKnown.Zero;
+    MaxLHS.clearSignBit();
+    APInt MaxRHS = ~RHSKnown.Zero;
+    MaxRHS.clearSignBit();
+    APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
+    return Result.isSignBitClear();
+  }
+
+  // If either of the values is known to be negative, adding them can only
+  // overflow if the second is also negative, so we can assume that.
+  // Two negative number will only overflow if there is no carry to the sign
+  // bit, so we can check if even when the values are as small as possible
+  // there is overflow to the sign bit.
+  if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
+    APInt MinLHS = LHSKnown.One;
+    MinLHS.clearSignBit();
+    APInt MinRHS = RHSKnown.One;
+    MinRHS.clearSignBit();
+    APInt Result = std::move(MinLHS) + std::move(MinRHS);
+    return Result.isSignBitSet();
+  }
+
+  // If we reached here it means that we know nothing about the sign bits.
+  // In this case we can't know if there will be an overflow, since by 
+  // changing the sign bits any two values can be made to overflow.
+  return false;
+}
+
 static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
                                                   const Value *RHS,
                                                   const AddOperator *Add,
@@ -3556,18 +3666,29 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
     return OverflowResult::NeverOverflows;
   }
 
-  bool LHSKnownNonNegative, LHSKnownNegative;
-  bool RHSKnownNonNegative, RHSKnownNegative;
-  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
-                 AC, CxtI, DT);
-  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
-                 AC, CxtI, DT);
+  // If LHS and RHS each have at least two sign bits, the addition will look
+  // like
+  //
+  // XX..... +
+  // YY.....
+  //
+  // If the carry into the most significant position is 0, X and Y can't both
+  // be 1 and therefore the carry out of the addition is also 0.
+  //
+  // If the carry into the most significant position is 1, X and Y can't both
+  // be 0 and therefore the carry out of the addition is also 1.
+  //
+  // Since the carry into the most significant position is always equal to
+  // the carry out of the addition, there is no signed overflow.
+  if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
+      ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+    return OverflowResult::NeverOverflows;
+
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
 
-  if ((LHSKnownNonNegative && RHSKnownNegative) ||
-      (LHSKnownNegative && RHSKnownNonNegative)) {
-    // The sign bits are opposite: this CANNOT overflow.
+  if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
     return OverflowResult::NeverOverflows;
-  }
 
   // The remaining code needs Add to be available. Early returns if not so.
   if (!Add)
@@ -3578,14 +3699,13 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
   // @llvm.assume'ed non-negative rather than proved so from analyzing its
   // operands.
   bool LHSOrRHSKnownNonNegative =
-      (LHSKnownNonNegative || RHSKnownNonNegative);
-  bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+      (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
+  bool LHSOrRHSKnownNegative = 
+      (LHSKnown.isNegative() || RHSKnown.isNegative());
   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
-    bool AddKnownNonNegative, AddKnownNegative;
-    ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
-                   /*Depth=*/0, AC, CxtI, DT);
-    if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
-        (AddKnownNegative && LHSOrRHSKnownNegative)) {
+    KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+    if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
+        (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
       return OverflowResult::NeverOverflows;
     }
   }
@@ -3700,6 +3820,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
     return false;
   if (isa<ReturnInst>(I))
     return false;
+  if (isa<UnreachableInst>(I))
+    return false;
 
   // Calls can throw, or contain an infinite loop, or kill the process.
   if (auto CS = ImmutableCallSite(I)) {
@@ -3748,79 +3870,33 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
 
 bool llvm::propagatesFullPoison(const Instruction *I) {
   switch (I->getOpcode()) {
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Xor:
-    case Instruction::Trunc:
-    case Instruction::BitCast:
-    case Instruction::AddrSpaceCast:
-      // These operations all propagate poison unconditionally. Note that poison
-      // is not any particular value, so xor or subtraction of poison with
-      // itself still yields poison, not zero.
-      return true;
-
-    case Instruction::AShr:
-    case Instruction::SExt:
-      // For these operations, one bit of the input is replicated across
-      // multiple output bits. A replicated poison bit is still poison.
-      return true;
-
-    case Instruction::Shl: {
-      // Left shift *by* a poison value is poison. The number of
-      // positions to shift is unsigned, so no negative values are
-      // possible there. Left shift by zero places preserves poison. So
-      // it only remains to consider left shift of poison by a positive
-      // number of places.
-      //
-      // A left shift by a positive number of places leaves the lowest order bit
-      // non-poisoned. However, if such a shift has a no-wrap flag, then we can
-      // make the poison operand violate that flag, yielding a fresh full-poison
-      // value.
-      auto *OBO = cast<OverflowingBinaryOperator>(I);
-      return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
-    }
-
-    case Instruction::Mul: {
-      // A multiplication by zero yields a non-poison zero result, so we need to
-      // rule out zero as an operand. Conservatively, multiplication by a
-      // non-zero constant is not multiplication by zero.
-      //
-      // Multiplication by a non-zero constant can leave some bits
-      // non-poisoned. For example, a multiplication by 2 leaves the lowest
-      // order bit unpoisoned. So we need to consider that.
-      //
-      // Multiplication by 1 preserves poison. If the multiplication has a
-      // no-wrap flag, then we can make the poison operand violate that flag
-      // when multiplied by any integer other than 0 and 1.
-      auto *OBO = cast<OverflowingBinaryOperator>(I);
-      if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
-        for (Value *V : OBO->operands()) {
-          if (auto *CI = dyn_cast<ConstantInt>(V)) {
-            // A ConstantInt cannot yield poison, so we can assume that it is
-            // the other operand that is poison.
-            return !CI->isZero();
-          }
-        }
-      }
-      return false;
-    }
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Xor:
+  case Instruction::Trunc:
+  case Instruction::BitCast:
+  case Instruction::AddrSpaceCast:
+  case Instruction::Mul:
+  case Instruction::Shl:
+  case Instruction::GetElementPtr:
+    // These operations all propagate poison unconditionally. Note that poison
+    // is not any particular value, so xor or subtraction of poison with
+    // itself still yields poison, not zero.
+    return true;
 
-    case Instruction::ICmp:
-      // Comparing poison with any value yields poison.  This is why, for
-      // instance, x s< (x +nsw 1) can be folded to true.
-      return true;
+  case Instruction::AShr:
+  case Instruction::SExt:
+    // For these operations, one bit of the input is replicated across
+    // multiple output bits. A replicated poison bit is still poison.
+    return true;
 
-    case Instruction::GetElementPtr:
-      // A GEP implicitly represents a sequence of additions, subtractions,
-      // truncations, sign extensions and multiplications. The multiplications
-      // are by the non-zero sizes of some set of types, so we do not have to be
-      // concerned with multiplication by zero. If the GEP is in-bounds, then
-      // these operations are implicitly no-signed-wrap so poison is propagated
-      // by the arguments above for Add, Sub, Trunc, SExt and Mul.
-      return cast<GEPOperator>(I)->isInBounds();
+  case Instruction::ICmp:
+    // Comparing poison with any value yields poison.  This is why, for
+    // instance, x s< (x +nsw 1) can be folded to true.
+    return true;
 
-    default:
-      return false;
+  default:
+    return false;
   }
 }
 
@@ -3849,7 +3925,7 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
   }
 }
 
-bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
   // We currently only look for uses of poison values within the same basic
   // block, as that makes it easier to guarantee that the uses will be
   // executed given that PoisonI is executed.
@@ -3923,6 +3999,37 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
                                        Value *CmpLHS, Value *CmpRHS,
                                        Value *TrueVal, Value *FalseVal,
                                        Value *&LHS, Value *&RHS) {
+  // Assume success. If there's no match, callers should not use these anyway.
+  LHS = TrueVal;
+  RHS = FalseVal;
+
+  // Recognize variations of:
+  // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
+  const APInt *C1;
+  if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
+    const APInt *C2;
+
+    // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
+    if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
+        C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
+      return {SPF_SMAX, SPNB_NA, false};
+
+    // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
+    if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
+        C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
+      return {SPF_SMIN, SPNB_NA, false};
+
+    // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
+    if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
+        C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
+      return {SPF_UMAX, SPNB_NA, false};
+
+    // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
+    if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
+        C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
+      return {SPF_UMIN, SPNB_NA, false};
+  }
+
   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
     return {SPF_UNKNOWN, SPNB_NA, false};
 
@@ -3930,23 +4037,16 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
   // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
   // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
   if (match(TrueVal, m_Zero()) &&
-      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
-    LHS = TrueVal;
-    RHS = FalseVal;
+      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
-  }
 
   // Z = X -nsw Y
   // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
   // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
   if (match(FalseVal, m_Zero()) &&
-      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
-    LHS = TrueVal;
-    RHS = FalseVal;
+      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
-  }
 
-  const APInt *C1;
   if (!match(CmpRHS, m_APInt(C1)))
     return {SPF_UNKNOWN, SPNB_NA, false};
 
@@ -3957,41 +4057,29 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
     // Is the sign bit set?
     // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
     // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
-    if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
-      LHS = TrueVal;
-      RHS = FalseVal;
+    if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue())
       return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
-    }
 
     // Is the sign bit clear?
     // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
     // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
     if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
-        C2->isMinSignedValue()) {
-      LHS = TrueVal;
-      RHS = FalseVal;
+        C2->isMinSignedValue())
       return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
-    }
   }
 
   // Look through 'not' ops to find disguised signed min/max.
   // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
   // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
   if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
-      match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) {
-    LHS = TrueVal;
-    RHS = FalseVal;
+      match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2)
     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
-  }
 
   // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
   // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
   if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
-      match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) {
-    LHS = TrueVal;
-    RHS = FalseVal;
+      match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2)
     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
-  }
 
   return {SPF_UNKNOWN, SPNB_NA, false};
 }
@@ -4118,58 +4206,64 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
 
 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
                               Instruction::CastOps *CastOp) {
-  CastInst *CI = dyn_cast<CastInst>(V1);
-  Constant *C = dyn_cast<Constant>(V2);
-  if (!CI)
+  auto *Cast1 = dyn_cast<CastInst>(V1);
+  if (!Cast1)
     return nullptr;
-  *CastOp = CI->getOpcode();
-
-  if (auto *CI2 = dyn_cast<CastInst>(V2)) {
-    // If V1 and V2 are both the same cast from the same type, we can look
-    // through V1.
-    if (CI2->getOpcode() == CI->getOpcode() &&
-        CI2->getSrcTy() == CI->getSrcTy())
-      return CI2->getOperand(0);
-    return nullptr;
-  } else if (!C) {
+
+  *CastOp = Cast1->getOpcode();
+  Type *SrcTy = Cast1->getSrcTy();
+  if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
+    // If V1 and V2 are both the same cast from the same type, look through V1.
+    if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
+      return Cast2->getOperand(0);
     return nullptr;
   }
 
-  Constant *CastedTo = nullptr;
-
-  if (isa<ZExtInst>(CI) && CmpI->isUnsigned())
-    CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy());
-
-  if (isa<SExtInst>(CI) && CmpI->isSigned())
-    CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy(), true);
-
-  if (isa<TruncInst>(CI))
-    CastedTo = ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
-
-  if (isa<FPTruncInst>(CI))
-    CastedTo = ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
-
-  if (isa<FPExtInst>(CI))
-    CastedTo = ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
-
-  if (isa<FPToUIInst>(CI))
-    CastedTo = ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
-
-  if (isa<FPToSIInst>(CI))
-    CastedTo = ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
-
-  if (isa<UIToFPInst>(CI))
-    CastedTo = ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+  auto *C = dyn_cast<Constant>(V2);
+  if (!C)
+    return nullptr;
 
-  if (isa<SIToFPInst>(CI))
-    CastedTo = ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+  Constant *CastedTo = nullptr;
+  switch (*CastOp) {
+  case Instruction::ZExt:
+    if (CmpI->isUnsigned())
+      CastedTo = ConstantExpr::getTrunc(C, SrcTy);
+    break;
+  case Instruction::SExt:
+    if (CmpI->isSigned())
+      CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
+    break;
+  case Instruction::Trunc:
+    CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
+    break;
+  case Instruction::FPTrunc:
+    CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
+    break;
+  case Instruction::FPExt:
+    CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true);
+    break;
+  case Instruction::FPToUI:
+    CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true);
+    break;
+  case Instruction::FPToSI:
+    CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true);
+    break;
+  case Instruction::UIToFP:
+    CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true);
+    break;
+  case Instruction::SIToFP:
+    CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true);
+    break;
+  default:
+    break;
+  }
 
   if (!CastedTo)
     return nullptr;
 
-  Constant *CastedBack =
-      ConstantExpr::getCast(CI->getOpcode(), CastedTo, C->getType(), true);
   // Make sure the cast doesn't lose any information.
+  Constant *CastedBack =
+      ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true);
   if (CastedBack != C)
     return nullptr;
 
@@ -4253,11 +4347,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred,
       // If X & C == 0 then (X | C) == X +_{nuw} C
       if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
           match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
-        unsigned BitWidth = CA->getBitWidth();
-        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-        computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+        KnownBits Known(CA->getBitWidth());
+        computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT);
 
-        if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+        if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero))
           return true;
       }
 
@@ -4361,35 +4454,64 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
 }
 
 Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
-                                        const DataLayout &DL, bool InvertAPred,
+                                        const DataLayout &DL, bool LHSIsFalse,
                                         unsigned Depth, AssumptionCache *AC,
                                         const Instruction *CxtI,
                                         const DominatorTree *DT) {
+  // Bail out when we hit the limit.
+  if (Depth == MaxDepth)
+    return None;
+
   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
   if (LHS->getType() != RHS->getType())
     return None;
 
   Type *OpTy = LHS->getType();
-  assert(OpTy->getScalarType()->isIntegerTy(1));
+  assert(OpTy->isIntOrIntVectorTy(1));
 
   // LHS ==> RHS by definition
-  if (!InvertAPred && LHS == RHS)
-    return true;
+  if (LHS == RHS)
+    return !LHSIsFalse;
 
   if (OpTy->isVectorTy())
     // TODO: extending the code below to handle vectors
     return None;
   assert(OpTy->isIntegerTy(1) && "implied by above");
 
-  ICmpInst::Predicate APred, BPred;
-  Value *ALHS, *ARHS;
   Value *BLHS, *BRHS;
+  ICmpInst::Predicate BPred;
+  // We expect the RHS to be an icmp.
+  if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+    return None;
 
-  if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
-      !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+  Value *ALHS, *ARHS;
+  ICmpInst::Predicate APred;
+  // The LHS can be an 'or', 'and', or 'icmp'.
+  if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) {
+    // The remaining tests are all recursive, so bail out if we hit the limit.
+    if (Depth == MaxDepth)
+      return None;
+    // If the result of an 'or' is false, then we know both legs of the 'or' are
+    // false.  Similarly, if the result of an 'and' is true, then we know both
+    // legs of the 'and' are true.
+    if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
+        (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
+      if (Optional<bool> Implication = isImpliedCondition(
+              ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
+        return Implication;
+      if (Optional<bool> Implication = isImpliedCondition(
+              ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT))
+        return Implication;
+      return None;
+    }
     return None;
+  }
+  // All of the below logic assumes both LHS and RHS are icmps.
+  assert(isa<ICmpInst>(LHS) && isa<ICmpInst>(RHS) && "Expected icmps.");
 
-  if (InvertAPred)
+  // The rest of the logic assumes the LHS condition is true.  If that's not the
+  // case, invert the predicate to make it so.
+  if (LHSIsFalse)
     APred = CmpInst::getInversePredicate(APred);
 
   // Can we infer anything when the two compares have matching operands?
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 7e598f4..554d132 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -11,18 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/Constants.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -300,7 +301,7 @@ const llvm::Value *llvm::getSplatValue(const Value *V) {
   auto *InsertEltInst =
     dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
   if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
-      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
     return nullptr;
 
   return InsertEltInst->getOperand(1);
@@ -488,3 +489,88 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
 
   return Inst;
 }
+
+Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
+                                     unsigned NumVecs) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < VF; i++)
+    for (unsigned j = 0; j < NumVecs; j++)
+      Mask.push_back(Builder.getInt32(j * VF + i));
+
+  return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
+                                 unsigned Stride, unsigned VF) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < VF; i++)
+    Mask.push_back(Builder.getInt32(Start + i * Stride));
+
+  return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                                     unsigned NumInts, unsigned NumUndefs) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < NumInts; i++)
+    Mask.push_back(Builder.getInt32(Start + i));
+
+  Constant *Undef = UndefValue::get(Builder.getInt32Ty());
+  for (unsigned i = 0; i < NumUndefs; i++)
+    Mask.push_back(Undef);
+
+  return ConstantVector::get(Mask);
+}
+
+/// A helper function for concatenating vectors. This function concatenates two
+/// vectors having the same element type. If the second vector has fewer
+/// elements than the first, it is padded with undefs.
+static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
+                                    Value *V2) {
+  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
+  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
+  assert(VecTy1 && VecTy2 &&
+         VecTy1->getScalarType() == VecTy2->getScalarType() &&
+         "Expect two vectors with the same element type");
+
+  unsigned NumElts1 = VecTy1->getNumElements();
+  unsigned NumElts2 = VecTy2->getNumElements();
+  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
+
+  if (NumElts1 > NumElts2) {
+    // Extend with UNDEFs.
+    Constant *ExtMask =
+        createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
+    V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
+  }
+
+  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
+  return Builder.CreateShuffleVector(V1, V2, Mask);
+}
+
+Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
+  unsigned NumVecs = Vecs.size();
+  assert(NumVecs > 1 && "Should be at least two vectors");
+
+  SmallVector<Value *, 8> ResList;
+  ResList.append(Vecs.begin(), Vecs.end());
+  do {
+    SmallVector<Value *, 8> TmpList;
+    for (unsigned i = 0; i < NumVecs - 1; i += 2) {
+      Value *V0 = ResList[i], *V1 = ResList[i + 1];
+      assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
+             "Only the last vector may have a different type");
+
+      TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
+    }
+
+    // Push the last vector if the total number of vectors is odd.
+    if (NumVecs % 2 != 0)
+      TmpList.push_back(ResList[NumVecs - 1]);
+
+    ResList = TmpList;
+    NumVecs = ResList.size();
+  } while (NumVecs > 1);
+
+  return ResList[0];
+}