196 files changed, 19958 insertions, 7727 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index a736884..bb90861 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -161,8 +161,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Mark live-out callee-saved registers. In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  BitVector Pristine = MFI->getPristineRegs(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI.getPristineRegs(MF);
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 40451c0..d840a2f 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -48,7 +48,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   });
 #ifndef NDEBUG
   for (unsigned I = 0, E = Hints.size(); I != E; ++I)
-    assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+    assert(is_contained(Order, Hints[I]) &&
            "Target hint is outside allocation order.");
 #endif
 }
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
index 2aee3a6..8223a52 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -18,6 +18,7 @@
 #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCRegisterInfo.h"
 
 namespace llvm {
@@ -79,9 +80,7 @@ public:
   bool isHint() const { return Pos <= 0; }
 
   /// Return true if PhysReg is a preferred register.
-  bool isHint(unsigned PhysReg) const {
-    return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
-  }
+  bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index d690734..79ecc43 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
                TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
       DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
       NoopInput = Op;
-    } else if (isa<CallInst>(I)) {
-      // Look through call (skipping callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
-    } else if (isa<InvokeInst>(I)) {
-      // Look through invoke (skipping BB, BB, Callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
+    } else if (auto CS = ImmutableCallSite(I)) {
+      const Value *ReturnedOp = CS.getReturnedArgOperand();
+      if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
+        NoopInput = ReturnedOp;
     } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
       // Value may come from either the aggregate or the scalar
       ArrayRef<unsigned> InsertLoc = IVI->getIndices();
@@ -525,19 +507,15 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
       F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
 }
 
-bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
-                                           const Instruction *I,
-                                           const ReturnInst *Ret,
-                                           const TargetLoweringBase &TLI) {
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
+bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
+                                    const ReturnInst *Ret,
+                                    const TargetLoweringBase &TLI,
+                                    bool *AllowDifferingSizes) {
+  // ADS may be null, so don't write to it directly.
+  bool DummyADS;
+  bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
+  ADS = true;
 
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Make sure the attributes attached to each return are compatible.
   AttrBuilder CallerAttrs(F->getAttributes(),
                           AttributeSet::ReturnIndex);
   AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
@@ -545,22 +523,21 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
 
   // Noalias is completely benign as far as calling convention goes, it
   // shouldn't affect whether the call is a tail call.
-  CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
-  CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
+  CallerAttrs.removeAttribute(Attribute::NoAlias);
+  CalleeAttrs.removeAttribute(Attribute::NoAlias);
 
-  bool AllowDifferingSizes = true;
   if (CallerAttrs.contains(Attribute::ZExt)) {
     if (!CalleeAttrs.contains(Attribute::ZExt))
       return false;
 
-    AllowDifferingSizes = false;
+    ADS = false;
     CallerAttrs.removeAttribute(Attribute::ZExt);
     CalleeAttrs.removeAttribute(Attribute::ZExt);
   } else if (CallerAttrs.contains(Attribute::SExt)) {
     if (!CalleeAttrs.contains(Attribute::SExt))
       return false;
 
-    AllowDifferingSizes = false;
+    ADS = false;
     CallerAttrs.removeAttribute(Attribute::SExt);
     CalleeAttrs.removeAttribute(Attribute::SExt);
   }
@@ -568,7 +545,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
   // If they're still different, there's some facet we don't understand
   // (currently only "inreg", but in future who knows). It may be OK but the
   // only safe option is to reject the tail call.
-  if (CallerAttrs != CalleeAttrs)
+  return CallerAttrs == CalleeAttrs;
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+                                           const Instruction *I,
+                                           const ReturnInst *Ret,
+                                           const TargetLoweringBase &TLI) {
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Make sure the attributes attached to each return are compatible.
+  bool AllowDifferingSizes;
+  if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
     return false;
 
   const Value *RetVal = Ret->getOperand(0), *CallVal = I;
@@ -672,7 +666,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
   DenseMap<const MachineBasicBlock *, int> FuncletMembership;
 
   // We don't have anything to do if there aren't any EH pads.
-  if (!MF.getMMI().hasEHFunclets())
+  if (!MF.hasEHFunclets())
     return FuncletMembership;
 
   int EntryBBNumber = MF.front().getNumber();
@@ -694,9 +688,10 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
     }
 
     MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+
     // CatchPads are not funclets for SEH so do not consider CatchRet to
     // transfer control to another funclet.
-    if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+    if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
       continue;
 
     // FIXME: SEH CatchPads are not necessarily in the parent function:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 5294c98..61149d9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -43,13 +43,6 @@ ARMTargetStreamer &ARMException::getTargetStreamer() {
   return static_cast<ARMTargetStreamer &>(TS);
 }
 
-/// endModule - Emit all exception information that should come after the
-/// content.
-void ARMException::endModule() {
-  if (shouldEmitCFI)
-    Asm->OutStreamer->EmitCFISections(false, true);
-}
-
 void ARMException::beginFunction(const MachineFunction *MF) {
   if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
     getTargetStreamer().emitFnStart();
@@ -57,7 +50,14 @@ void ARMException::beginFunction(const MachineFunction *MF) {
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
   assert(MoveType != AsmPrinter::CFI_M_EH &&
          "non-EH CFI not yet supported in prologue with EHABI lowering");
+
   if (MoveType == AsmPrinter::CFI_M_Debug) {
+    if (!hasEmittedCFISections) {
+      if (Asm->needsOnlyDebugCFIMoves())
+        Asm->OutStreamer->EmitCFISections(false, true);
+      hasEmittedCFISections = true;
+    }
+
     shouldEmitCFI = true;
     Asm->OutStreamer->EmitCFIStartProc(false);
   }
@@ -75,7 +75,7 @@ void ARMException::endFunction(const MachineFunction *MF) {
     F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
     F->needsUnwindTableEntry();
   bool shouldEmitPersonality = forceEmitPersonality ||
-    !MMI->getLandingPads().empty();
+    !MF->getLandingPads().empty();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
       !shouldEmitPersonality)
     ATS.emitCantUnwind();
@@ -99,8 +99,9 @@ void ARMException::endFunction(const MachineFunction *MF) {
 }
 
 void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 8c68383..ec552e0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -14,8 +14,6 @@
 
 using namespace llvm;
 
-class MCExpr;
-
 unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
   HasBeenUsed = true;
   auto IterBool =
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 272bace..24fdbfc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -37,6 +37,8 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
@@ -55,10 +57,15 @@ using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "Debug Info Emission";
-static const char *const EHTimerName = "DWARF Exception Writer";
-static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "emit";
+static const char *const DbgTimerDescription = "Debug Info Emission";
+static const char *const EHTimerName = "write_exception";
+static const char *const EHTimerDescription = "DWARF Exception Writer";
+static const char *const CodeViewLineTablesGroupName = "linetables";
+static const char *const CodeViewLineTablesGroupDescription =
+  "CodeView Line Tables";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
@@ -101,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
 AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
     : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
       OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
-      LastMI(nullptr), LastFn(0), Counter(~0U) {
+      isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
   DD = nullptr;
   MMI = nullptr;
   LI = nullptr;
@@ -143,7 +150,7 @@ const DataLayout &AsmPrinter::getDataLayout() const {
 }
 
 // Do not use the cached DataLayout because some client use it without a Module
-// (llmv-dsymutil, llvm-dwarfdump).
+// (llvm-dsymutil, llvm-dwarfdump).
 unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
 
 const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
@@ -155,17 +162,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
   S.EmitInstruction(Inst, getSubtargetInfo());
 }
 
-StringRef AsmPrinter::getTargetTriple() const {
-  return TM.getTargetTriple().str();
-}
-
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer->getCurrentSection().first;
+  return OutStreamer->getCurrentSectionOnly();
 }
 
-
-
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -184,8 +185,6 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   OutStreamer->InitSections(false);
 
-  Mang = new Mangler();
-
   // Emit the version-min deplyment target directive if needed.
   //
   // FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -194,7 +193,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   // alternative is duplicated code in each of the target asm printers that
   // use the directive, where it would need the same conditionalization
   // anyway.
-  Triple TT(getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
   // If there is a version specified, Major will be non-zero.
   if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
     unsigned Major, Minor, Update;
@@ -250,18 +249,43 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   if (MAI->doesSupportDebugInformation()) {
     bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
-    if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+    if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() ||
+                         TM.getTargetTriple().isWindowsItaniumEnvironment())) {
       Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
-                                     DbgTimerName,
-                                     CodeViewLineTablesGroupName));
+                                     DbgTimerName, DbgTimerDescription,
+                                     CodeViewLineTablesGroupName,
+                                     CodeViewLineTablesGroupDescription));
     }
     if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
       DD = new DwarfDebug(this, &M);
       DD->beginModule();
-      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
+                                     DWARFGroupName, DWARFGroupDescription));
     }
   }
 
+  switch (MAI->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+    isCFIMoveForDebugging = true;
+    if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+      break;
+    for (auto &F: M.getFunctionList()) {
+      // If the module contains any function with unwind data,
+      // .eh_frame has to be emitted.
+      // Ignore functions that won't get emitted.
+      if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
+        isCFIMoveForDebugging = false;
+        break;
+      }
+    }
+    break;
+  default:
+    isCFIMoveForDebugging = false;
+    break;
+  }
+
   EHStreamer *ES = nullptr;
   switch (MAI->getExceptionHandlingType()) {
   case ExceptionHandling::None:
@@ -286,7 +310,8 @@ bool AsmPrinter::doInitialization(Module &M) {
     break;
   }
   if (ES)
-    Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
+    Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
+                                   DWARFGroupName, DWARFGroupDescription));
   return false;
 }
 
@@ -340,11 +365,11 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
 
 void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
                                    const GlobalValue *GV) const {
-  TM.getNameWithPrefix(Name, GV, *Mang);
+  TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler());
 }
 
 MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
-  return TM.getSymbol(GV, *Mang);
+  return TM.getSymbol(GV);
 }
 
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
@@ -407,7 +432,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   unsigned AlignLog = getGVAlignmentLog2(GV, DL);
 
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                       HI.TimerGroupName, HI.TimerGroupDescription,
+                       TimePassesIsEnabled);
     HI.Handler->setSymbolSize(GVSym, Size);
   }
 
@@ -424,8 +451,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   }
 
   // Determine to which section this global should be emitted.
-  MCSection *TheSection =
-      getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+  MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM);
 
   // If we have a bss global going to a section that supports the
   // zerofill directive, do so here.
@@ -483,7 +509,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym =
-      OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+        OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
 
     if (GVKind.isThreadBSS()) {
       TheSection = getObjFileLowering().getTLSBSSSection();
@@ -535,12 +561,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
+    OutStreamer->emitELFSize(EmittedInitSym,
                              MCConstantExpr::create(Size, OutContext));
 
   OutStreamer->AddBlankLine();
 }
 
+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                      unsigned Size) const {
+  OutStreamer->EmitValue(Value, Size);
+}
+
 /// EmitFunctionHeader - This method emits the header for the current
 /// function.
 void AsmPrinter::EmitFunctionHeader() {
@@ -550,8 +585,7 @@ void AsmPrinter::EmitFunctionHeader() {
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
-  OutStreamer->SwitchSection(
-      getObjFileLowering().SectionForGlobal(F, *Mang, TM));
+  OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
   EmitVisibility(CurrentFnSym, F->getVisibility());
 
   EmitLinkage(F, CurrentFnSym);
@@ -598,7 +632,8 @@ void AsmPrinter::EmitFunctionHeader() {
 
   // Emit pre-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->beginFunction(MF);
   }
 
@@ -632,26 +667,26 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   // Check for spills and reloads
   int FI;
 
-  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
   if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+    if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload\n";
     }
   } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI))
+    if (MFI.isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Reload\n";
   } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+    if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Spill\n";
     }
   } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI))
+    if (MFI.isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Spill\n";
   }
 
@@ -711,9 +746,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   OS << V->getName();
 
   const DIExpression *Expr = MI->getDebugExpression();
-  if (Expr->isBitPiece())
-    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
-       << " size=" << Expr->getBitPieceSize() << "]";
+  auto Fragment = Expr->getFragmentInfo();
+  if (Fragment)
+    OS << " [fragment offset=" << Fragment->OffsetInBits
+       << " size=" << Fragment->SizeInBits << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -721,21 +757,21 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
 
   for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
-    if (Deref) {
+    uint64_t Op = Expr->getElement(i);
+    if (Op == dwarf::DW_OP_LLVM_fragment) {
+      // There can't be any operands after this in a valid expression
+      break;
+    } else if (Deref) {
       // We currently don't support extra Offsets or derefs after the first
       // one. Bail out early instead of emitting an incorrect comment
       OS << " [complex expression]";
       AP.OutStreamer->emitRawComment(OS.str());
       return true;
-    }
-    uint64_t Op = Expr->getElement(i);
-    if (Op == dwarf::DW_OP_deref) {
+    } else if (Op == dwarf::DW_OP_deref) {
       Deref = true;
       continue;
-    } else if (Op == dwarf::DW_OP_bit_piece) {
-      // There can't be any operands after this in a valid expression
-      break;
     }
+
     uint64_t ExtraOffset = Expr->getElement(i++);
     if (Op == dwarf::DW_OP_plus)
       Offset += ExtraOffset;
@@ -756,7 +792,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       // There is no good way to print long double.  Convert a copy to
       // double.  Ah well, it's only a comment.
       bool ignored;
-      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+      APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
                   &ignored);
       OS << "(long double) " << APF.convertToDouble();
     }
@@ -819,8 +855,7 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
   if (needsCFIMoves() == CFI_M_None)
     return;
 
-  const MachineModuleInfo &MMI = MF->getMMI();
-  const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
+  const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
   unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
   const MCCFIInstruction &CFI = Instrs[CFIIndex];
   emitCFIInstruction(CFI);
@@ -862,7 +897,8 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
-          NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+          NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                             HI.TimerGroupName, HI.TimerGroupDescription,
                              TimePassesIsEnabled);
           HI.Handler->beginInstruction(&MI);
         }
@@ -906,7 +942,8 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
-          NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+          NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                             HI.TimerGroupName, HI.TimerGroupDescription,
                              TimePassesIsEnabled);
           HI.Handler->endInstruction();
         }
@@ -944,8 +981,8 @@ void AsmPrinter::EmitFunctionBody() {
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
-  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+  if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
     // Create a symbol for the end of function.
     CurrentFnEnd = createTempSymbol("func_end");
     OutStreamer->EmitLabel(CurrentFnEnd);
@@ -959,12 +996,12 @@ void AsmPrinter::EmitFunctionBody() {
     const MCExpr *SizeExp = MCBinaryExpr::createSub(
         MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
         MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
-    if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym))
-      OutStreamer->emitELFSize(Sym, SizeExp);
+    OutStreamer->emitELFSize(CurrentFnSym, SizeExp);
   }
 
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->markFunctionEnd();
   }
 
@@ -973,10 +1010,10 @@ void AsmPrinter::EmitFunctionBody() {
 
   // Emit post-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->endFunction(MF);
   }
-  MMI->EndFunction();
 
   OutStreamer->AddBlankLine();
 }
@@ -1100,8 +1137,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
         (!BaseObject || BaseObject->hasPrivateLinkage())) {
       const DataLayout &DL = M.getDataLayout();
       uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
-      OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
-                               MCConstantExpr::create(Size, OutContext));
+      OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
     }
   }
 }
@@ -1143,7 +1179,7 @@ bool AsmPrinter::doFinalization(Module &M) {
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM);
 
   if (TM.getTargetTriple().isOSBinFormatELF()) {
     MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -1164,8 +1200,8 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->endModule();
     delete HI.Handler;
   }
@@ -1246,7 +1282,6 @@ bool AsmPrinter::doFinalization(Module &M) {
   // after everything else has gone out.
   EmitEndOfAsmFile(M);
 
-  delete Mang; Mang = nullptr;
   MMI = nullptr;
 
   OutStreamer->Finish();
@@ -1269,8 +1304,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   CurrentFnBegin = nullptr;
   CurExceptionSym = nullptr;
   bool NeedsLocalForSize = MAI->needsLocalForSize();
-  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      MMI->hasEHFunclets() || NeedsLocalForSize) {
+  if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MF.hasEHFunclets() || NeedsLocalForSize) {
     CurrentFnBegin = createTempSymbol("func_begin");
     if (NeedsLocalForSize)
       CurrentFnSymForSize = CurrentFnBegin;
@@ -1392,7 +1427,7 @@ void AsmPrinter::EmitJumpTableInfo() {
       *F);
   if (JTInDiffSection) {
     // Drop it in the readonly section.
-    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM);
     OutStreamer->SwitchSection(ReadOnlySection);
   }
 
@@ -1536,12 +1571,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
                        /* isCtor */ true);
 
-    if (TM.getRelocationModel() == Reloc::Static &&
-        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
-      StringRef Sym(".constructors_used");
-      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
-                                       MCSA_Reference);
-    }
     return true;
   }
 
@@ -1549,12 +1578,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
                        /* isCtor */ false);
 
-    if (TM.getRelocationModel() == Reloc::Static &&
-        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
-      StringRef Sym(".destructors_used");
-      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
-                                       MCSA_Reference);
-    }
     return true;
   }
 
@@ -1699,7 +1722,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                      unsigned Size,
                                      bool IsSectionRelative) const {
   if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
-    OutStreamer->EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label, Offset);
+    if (Size > 4)
+      OutStreamer->EmitZeros(Size - 4);
     return;
   }
 
@@ -1764,7 +1789,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
+    if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
       if (C != CE)
         return lowerConstant(C);
 
@@ -1796,7 +1821,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case Instruction::BitCast:
     return lowerConstant(CE->getOperand(0));
 
@@ -1843,8 +1868,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       APInt RHSOffset;
       if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
                                      getDataLayout())) {
-        const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(
-            LHSGV, RHSGV, *Mang, TM);
+        const MCExpr *RelocExpr =
+            getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
         if (!RelocExpr)
           RelocExpr = MCBinaryExpr::createSub(
               MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
@@ -2299,7 +2324,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
       // If the constant expression's size is greater than 64-bits, then we have
       // to emit the value in chunks. Try to constant fold the value and emit it
       // that way.
-      Constant *New = ConstantFoldConstantExpression(CE, DL);
+      Constant *New = ConstantFoldConstant(CE, DL);
       if (New && New != CE)
         return emitGlobalConstantImpl(DL, New, AP);
     }
@@ -2385,8 +2410,7 @@ MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
 
 MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
                                                    StringRef Suffix) const {
-  return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang,
-                                                           TM);
+  return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM);
 }
 
 /// Return the MCSymbol for the specified ExternalSymbol.
@@ -2599,12 +2623,12 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
   if (GCPI != GCMap.end())
     return GCPI->second.get();
 
-  const char *Name = S.getName().c_str();
+  auto Name = S.getName();
 
   for (GCMetadataPrinterRegistry::iterator
          I = GCMetadataPrinterRegistry::begin(),
          E = GCMetadataPrinterRegistry::end(); I != E; ++I)
-    if (strcmp(Name, I->getName()) == 0) {
+    if (Name == I->getName()) {
       std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
       GMP->S = &S;
       auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
@@ -2618,3 +2642,76 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
 AsmPrinterHandler::~AsmPrinterHandler() {}
 
 void AsmPrinterHandler::markFunctionEnd() {}
+
+// In the binary's "xray_instr_map" section, an array of these function entries
+// describes each instrumentation point.  When XRay patches your code, the index
+// into this table will be given to your handler as a patch point identifier.
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
+                                         const MCSymbol *CurrentFnSym) const {
+  Out->EmitSymbolValue(Sled, Bytes);
+  Out->EmitSymbolValue(CurrentFnSym, Bytes);
+  auto Kind8 = static_cast<uint8_t>(Kind);
+  Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+  Out->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
+  Out->EmitZeros(2 * Bytes - 2);  // Pad the previous two entries
+}
+
+void AsmPrinter::emitXRayTable() {
+  if (Sleds.empty())
+    return;
+
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  auto Fn = MF->getFunction();
+  MCSection *Section = nullptr;
+  if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
+    if (Fn->hasComdat()) {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+                                         Fn->getComdat()->getName());
+    } else {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC);
+    }
+  } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
+  }
+
+  // Before we switch over, we force a reference to a label inside the
+  // xray_instr_map section. Since this function is always called just
+  // before the function's end, we assume that this is happening after
+  // the last return instruction.
+
+  auto WordSizeBytes = TM.getPointerSize();
+  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+  OutStreamer->EmitCodeAlignment(16);
+  OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(Tmp);
+  for (const auto &Sled : Sleds)
+    Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
+
+  OutStreamer->SwitchSection(PrevSection);
+  Sleds.clear();
+}
+
+void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
+  SledKind Kind) {
+  auto Fn = MI.getParent()->getParent()->getFunction();
+  auto Attr = Fn->getFnAttribute("function-instrument");
+  bool AlwaysInstrument =
+    Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+  Sleds.emplace_back(
+    XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+}
+
+uint16_t AsmPrinter::getDwarfVersion() const {
+  return OutStreamer->getContext().getDwarfVersion();
+}
+
+void AsmPrinter::setDwarfVersion(uint16_t Version) {
+  OutStreamer->getContext().setDwarfVersion(Version);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 60f40d0..0185c38 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -138,8 +138,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
     const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
     const MCExpr *Exp =
-        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
-                                     *OutStreamer);
+        TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer);
     OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
   } else
     OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
@@ -150,7 +149,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
   if (!ForceOffset) {
     // On COFF targets, we have to emit the special .secrel32 directive.
     if (MAI->needsDwarfSectionOffsetDirective()) {
-      OutStreamer->EmitCOFFSecRel32(Label);
+      OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
       return;
     }
 
@@ -175,36 +174,6 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
   EmitInt32(S.getOffset());
 }
 
-/// EmitDwarfRegOp - Emit dwarf register operation.
-void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
-                                const MachineLocation &MLoc) const {
-  DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer);
-  const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
-  int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
-  if (Reg < 0) {
-    // We assume that pointers are always in an addressable register.
-    if (MLoc.isIndirect())
-      // FIXME: We have no reasonable way of handling errors in here. The
-      // caller might be in the middle of a dwarf expression. We should
-      // probably assert that Reg >= 0 once debug info generation is more
-      // mature.
-      return Expr.EmitOp(dwarf::DW_OP_nop,
-                         "nop (could not find a dwarf register number)");
-
-    // Attempt to find a valid super- or sub-register.
-    if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(),
-                                 MLoc.getReg()))
-      Expr.EmitOp(dwarf::DW_OP_nop,
-                  "nop (could not find a dwarf register number)");
-    return;
-  }
-
-  if (MLoc.isIndirect())
-    Expr.AddRegIndirect(Reg, MLoc.getOffset());
-  else
-    Expr.AddReg(Reg);
-}
-
 //===----------------------------------------------------------------------===//
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 2ce6c18..57864e4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
   }
 
   SourceMgr SrcMgr;
+  SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
   SrcMgrDiagInfo DiagInfo;
 
   // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
@@ -193,6 +195,23 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
       }
       if (Done) break;
 
+      // If we have ${:foo}, then this is not a real operand reference, it is a
+      // "magic" string reference, just like in .td files.  Arrange to call
+      // PrintSpecial.
+      if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
+        LastEmitted += 2;
+        const char *StrStart = LastEmitted;
+        const char *StrEnd = strchr(StrStart, '}');
+        if (!StrEnd)
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
+
+        std::string Val(StrStart, StrEnd);
+        AP->PrintSpecial(MI, OS, Val.c_str());
+        LastEmitted = StrEnd+1;
+        break;
+      }
+
       const char *IDStart = LastEmitted;
       const char *IDEnd = IDStart;
       while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index ebf80de..8344051 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,17 +13,20 @@
 
 #include "CodeViewDebug.h"
 #include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDumper.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbol.h"
@@ -35,9 +38,11 @@
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 
 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
-    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
+    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
+      TypeTable(Allocator), CurFn(nullptr) {
   // If module doesn't have named metadata anchors or COFF debug section
   // is not available, skip any debug info related stuff.
   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
@@ -108,8 +113,9 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
   if (Insertion.second) {
     // We have to compute the full filepath and emit a .cv_file directive.
     StringRef FullPath = getFullFilepath(F);
-    NextId = OS.EmitCVFileDirective(NextId, FullPath);
-    assert(NextId == FileIdMap.size() && ".cv_file directive failed");
+    bool Success = OS.EmitCVFileDirective(NextId, FullPath);
+    (void)Success;
+    assert(Success && ".cv_file directive failed");
   }
   return Insertion.first->second;
 }
@@ -120,7 +126,16 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
   auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
   InlineSite *Site = &SiteInsertion.first->second;
   if (SiteInsertion.second) {
+    unsigned ParentFuncId = CurFn->FuncId;
+    if (const DILocation *OuterIA = InlinedAt->getInlinedAt())
+      ParentFuncId =
+          getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram())
+              .SiteFuncId;
+
     Site->SiteFuncId = NextFuncId++;
+    OS.EmitCVInlineSiteIdDirective(
+        Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
+        InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
     Site->Inlinee = Inlinee;
     InlinedSubprograms.insert(Inlinee);
     getFuncIdForSubprogram(Inlinee);
@@ -208,8 +223,8 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
 
   // Build the fully qualified name of the scope.
   std::string ScopeName = getFullyQualifiedName(Scope);
-  TypeIndex TI =
-      TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName));
+  StringIdRecord SID(TypeIndex(), ScopeName);
+  auto TI = TypeTable.writeKnownType(SID);
   return recordTypeIndexForDINode(Scope, TI);
 }
 
@@ -234,12 +249,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
     TypeIndex ClassType = getTypeIndex(Class);
     MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
                                DisplayName);
-    TI = TypeTable.writeMemberFuncId(MFuncId);
+    TI = TypeTable.writeKnownType(MFuncId);
   } else {
     // Otherwise, this must be a free function.
     TypeIndex ParentScope = getScopeIndex(Scope);
     FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
-    TI = TypeTable.writeFuncId(FuncId);
+    TI = TypeTable.writeKnownType(FuncId);
   }
 
   return recordTypeIndexForDINode(SP, TI);
@@ -353,8 +368,8 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
   }
 
   OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
-                        /*PrologueEnd=*/false,
-                        /*IsStmt=*/false, DL->getFilename());
+                        /*PrologueEnd=*/false, /*IsStmt=*/false,
+                        DL->getFilename(), SMLoc());
 }
 
 void CodeViewDebug::emitCodeViewMagicVersion() {
@@ -377,6 +392,11 @@ void CodeViewDebug::endModule() {
   // Use the generic .debug$S section, and make a subsection for all the inlined
   // subprograms.
   switchToDebugSectionForSymbol(nullptr);
+
+  MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols);
+  emitCompilerInformation();
+  endCVSubsection(CompilerInfo);
+
   emitInlineeLinesSubsection();
 
   // Emit per-function debug information.
@@ -418,10 +438,13 @@ void CodeViewDebug::endModule() {
 }
 
 static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
-  // Microsoft's linker seems to have trouble with symbol names longer than
-  // 0xffd8 bytes.
-  S = S.substr(0, 0xffd8);
-  SmallString<32> NullTerminatedString(S);
+  // The maximum CV record length is 0xFF00. Most of the strings we emit appear
+  // after a fixed length portion of the record. The fixed length portion should
+  // always be less than 0xF00 (3840) bytes, so truncate the string so that the
+  // overall record size is less than the maximum allowed.
+  unsigned MaxFixedRecordLength = 0xF00;
+  SmallString<32> NullTerminatedString(
+      S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
   NullTerminatedString.push_back('\0');
   OS.EmitBytes(NullTerminatedString);
 }
@@ -446,48 +469,175 @@ void CodeViewDebug::emitTypeInformation() {
     CommentPrefix += ' ';
   }
 
-  CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false);
-  TypeTable.ForEachRecord(
-      [&](TypeIndex Index, StringRef Record) {
-        if (OS.isVerboseAsm()) {
-          // Emit a block comment describing the type record for readability.
-          SmallString<512> CommentBlock;
-          raw_svector_ostream CommentOS(CommentBlock);
-          ScopedPrinter SP(CommentOS);
-          SP.setPrefix(CommentPrefix);
-          CVTD.setPrinter(&SP);
-          Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()});
-          if (E) {
-            logAllUnhandledErrors(std::move(E), errs(), "error: ");
-            llvm_unreachable("produced malformed type record");
-          }
-          // emitRawComment will insert its own tab and comment string before
-          // the first line, so strip off our first one. It also prints its own
-          // newline.
-          OS.emitRawComment(
-              CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
-        } else {
+  TypeDatabase TypeDB;
+  CVTypeDumper CVTD(TypeDB);
+  TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) {
+    if (OS.isVerboseAsm()) {
+      // Emit a block comment describing the type record for readability.
+      SmallString<512> CommentBlock;
+      raw_svector_ostream CommentOS(CommentBlock);
+      ScopedPrinter SP(CommentOS);
+      SP.setPrefix(CommentPrefix);
+      TypeDumpVisitor TDV(TypeDB, &SP, false);
+      Error E = CVTD.dump(Record, TDV);
+      if (E) {
+        logAllUnhandledErrors(std::move(E), errs(), "error: ");
+        llvm_unreachable("produced malformed type record");
+      }
+      // emitRawComment will insert its own tab and comment string before
+      // the first line, so strip off our first one. It also prints its own
+      // newline.
+      OS.emitRawComment(
+          CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
+    } else {
 #ifndef NDEBUG
-          // Assert that the type data is valid even if we aren't dumping
-          // comments. The MSVC linker doesn't do much type record validation,
-          // so the first link of an invalid type record can succeed while
-          // subsequent links will fail with LNK1285.
-          ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()});
-          CVTypeArray Types;
-          StreamReader Reader(Stream);
-          Error E = Reader.readArray(Types, Reader.getLength());
-          if (!E) {
-            TypeVisitorCallbacks C;
-            E = CVTypeVisitor(C).visitTypeStream(Types);
-          }
-          if (E) {
-            logAllUnhandledErrors(std::move(E), errs(), "error: ");
-            llvm_unreachable("produced malformed type record");
-          }
+      // Assert that the type data is valid even if we aren't dumping
+      // comments. The MSVC linker doesn't do much type record validation,
+      // so the first link of an invalid type record can succeed while
+      // subsequent links will fail with LNK1285.
+      ByteStream Stream(Record);
+      CVTypeArray Types;
+      StreamReader Reader(Stream);
+      Error E = Reader.readArray(Types, Reader.getLength());
+      if (!E) {
+        TypeVisitorCallbacks C;
+        E = CVTypeVisitor(C).visitTypeStream(Types);
+      }
+      if (E) {
+        logAllUnhandledErrors(std::move(E), errs(), "error: ");
+        llvm_unreachable("produced malformed type record");
+      }
 #endif
-        }
-        OS.EmitBinaryData(Record);
-      });
+    }
+    StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
+    OS.EmitBinaryData(S);
+  });
+}
+
+namespace {
+
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+  switch (DWLang) {
+  case dwarf::DW_LANG_C:
+  case dwarf::DW_LANG_C89:
+  case dwarf::DW_LANG_C99:
+  case dwarf::DW_LANG_C11:
+  case dwarf::DW_LANG_ObjC:
+    return SourceLanguage::C;
+  case dwarf::DW_LANG_C_plus_plus:
+  case dwarf::DW_LANG_C_plus_plus_03:
+  case dwarf::DW_LANG_C_plus_plus_11:
+  case dwarf::DW_LANG_C_plus_plus_14:
+    return SourceLanguage::Cpp;
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran03:
+  case dwarf::DW_LANG_Fortran08:
+    return SourceLanguage::Fortran;
+  case dwarf::DW_LANG_Pascal83:
+    return SourceLanguage::Pascal;
+  case dwarf::DW_LANG_Cobol74:
+  case dwarf::DW_LANG_Cobol85:
+    return SourceLanguage::Cobol;
+  case dwarf::DW_LANG_Java:
+    return SourceLanguage::Java;
+  default:
+    // There's no CodeView representation for this language, and CV doesn't
+    // have an "unknown" option for the language field, so we'll use MASM,
+    // as it's very low level.
+    return SourceLanguage::Masm;
+  }
+}
+
+struct Version {
+  int Part[4];
+};
+
+// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
+// the version number.
+static Version parseVersion(StringRef Name) {
+  Version V = {{0}};
+  int N = 0;
+  for (const char C : Name) {
+    if (isdigit(C)) {
+      V.Part[N] *= 10;
+      V.Part[N] += C - '0';
+    } else if (C == '.') {
+      ++N;
+      if (N >= 4)
+        return V;
+    } else if (N > 0)
+      return V;
+  }
+  return V;
+}
+
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+  switch (Type) {
+    case Triple::ArchType::x86:
+      return CPUType::Pentium3;
+    case Triple::ArchType::x86_64:
+      return CPUType::X64;
+    case Triple::ArchType::thumb:
+      return CPUType::Thumb;
+    default:
+      report_fatal_error("target architecture doesn't map to a CodeView "
+                         "CPUType");
+  }
+}
+
+}  // anonymous namespace
+
+void CodeViewDebug::emitCompilerInformation() {
+  MCContext &Context = MMI->getContext();
+  MCSymbol *CompilerBegin = Context.createTempSymbol(),
+           *CompilerEnd = Context.createTempSymbol();
+  OS.AddComment("Record length");
+  OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
+  OS.EmitLabel(CompilerBegin);
+  OS.AddComment("Record kind: S_COMPILE3");
+  OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+  uint32_t Flags = 0;
+
+  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+  const MDNode *Node = *CUs->operands().begin();
+  const auto *CU = cast<DICompileUnit>(Node);
+
+  // The low byte of the flags indicates the source language.
+  Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+  // TODO:  Figure out which other flags need to be set.
+
+  OS.AddComment("Flags and language");
+  OS.EmitIntValue(Flags, 4);
+
+  OS.AddComment("CPUType");
+  CPUType CPU =
+      mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+  OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+
+  StringRef CompilerVersion = CU->getProducer();
+  Version FrontVer = parseVersion(CompilerVersion);
+  OS.AddComment("Frontend version");
+  for (int N = 0; N < 4; ++N)
+    OS.EmitIntValue(FrontVer.Part[N], 2);
+
+  // Some Microsoft tools, like Binscope, expect a backend version number of at
+  // least 8.something, so we'll coerce the LLVM version into a form that
+  // guarantees it'll be big enough without really lying about the version.
+  int Major = 1000 * LLVM_VERSION_MAJOR +
+              10 * LLVM_VERSION_MINOR +
+              LLVM_VERSION_PATCH;
+  // Clamp it for builds that use unusually large version numbers.
+  Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max());
+  Version BackVer = {{ Major, 0, 0, 0 }};
+  OS.AddComment("Backend version");
+  for (int N = 0; N < 4; ++N)
+    OS.EmitIntValue(BackVer.Part[N], 2);
+
+  OS.AddComment("Null-terminated compiler version string");
+  emitNullTerminatedSymbolName(OS, CompilerVersion);
+
+  OS.EmitLabel(CompilerEnd);
 }
 
 void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -525,17 +675,6 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
   endCVSubsection(InlineEnd);
 }
 
-void CodeViewDebug::collectInlineSiteChildren(
-    SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
-    const InlineSite &Site) {
-  for (const DILocation *ChildSiteLoc : Site.ChildSites) {
-    auto I = FI.InlineSites.find(ChildSiteLoc);
-    const InlineSite &ChildSite = I->second;
-    Children.push_back(ChildSite.SiteFuncId);
-    collectInlineSiteChildren(Children, FI, ChildSite);
-  }
-}
-
 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
                                         const DILocation *InlinedAt,
                                         const InlineSite &Site) {
@@ -561,11 +700,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
 
   unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
   unsigned StartLineNum = Site.Inlinee->getLine();
-  SmallVector<unsigned, 3> SecondaryFuncIds;
-  collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
 
   OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
-                                    FI.Begin, FI.End, SecondaryFuncIds);
+                                    FI.Begin, FI.End);
 
   OS.EmitLabel(InlineEnd);
 
@@ -641,13 +778,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
     OS.EmitLabel(ProcRecordBegin);
 
-  if (GV->hasLocalLinkage()) {
-    OS.AddComment("Record kind: S_LPROC32_ID");
-    OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
-  } else {
-    OS.AddComment("Record kind: S_GPROC32_ID");
-    OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
-  }
+    if (GV->hasLocalLinkage()) {
+      OS.AddComment("Record kind: S_LPROC32_ID");
+      OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
+    } else {
+      OS.AddComment("Record kind: S_GPROC32_ID");
+      OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
+    }
 
     // These fields are filled in by tools like CVPACK which run after the fact.
     OS.AddComment("PtrParent");
@@ -667,7 +804,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     OS.AddComment("Function type index");
     OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
     OS.AddComment("Function section relative address");
-    OS.EmitCOFFSecRel32(Fn);
+    OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
     OS.AddComment("Function section index");
     OS.EmitCOFFSectionIndex(Fn);
     OS.AddComment("Flags");
@@ -711,29 +848,33 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
   DR.InMemory = -1;
   DR.DataOffset = Offset;
   assert(DR.DataOffset == Offset && "truncation");
+  DR.IsSubfield = 0;
   DR.StructOffset = 0;
   DR.CVRegister = CVRegister;
   return DR;
 }
 
 CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeReg(uint16_t CVRegister) {
+CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
+                                     int Offset, bool IsSubfield,
+                                     uint16_t StructOffset) {
   LocalVarDefRange DR;
-  DR.InMemory = 0;
-  DR.DataOffset = 0;
-  DR.StructOffset = 0;
+  DR.InMemory = InMemory;
+  DR.DataOffset = Offset;
+  DR.IsSubfield = IsSubfield;
+  DR.StructOffset = StructOffset;
   DR.CVRegister = CVRegister;
   return DR;
 }
 
-void CodeViewDebug::collectVariableInfoFromMMITable(
+void CodeViewDebug::collectVariableInfoFromMFTable(
     DenseSet<InlinedVariable> &Processed) {
-  const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget();
+  const MachineFunction &MF = *Asm->MF;
+  const TargetSubtargetInfo &TSI = MF.getSubtarget();
   const TargetFrameLowering *TFI = TSI.getFrameLowering();
   const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
 
-  for (const MachineModuleInfo::VariableDbgInfo &VI :
-       MMI->getVariableDbgInfo()) {
+  for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -770,7 +911,7 @@ void CodeViewDebug::collectVariableInfoFromMMITable(
 void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
   DenseSet<InlinedVariable> Processed;
   // Grab the variable info that was squirreled away in the MMI side-table.
-  collectVariableInfoFromMMITable(Processed);
+  collectVariableInfoFromMFTable(Processed);
 
   const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
 
@@ -802,10 +943,17 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
       const MachineInstr *DVInst = Range.first;
       assert(DVInst->isDebugValue() && "Invalid History entry");
       const DIExpression *DIExpr = DVInst->getDebugExpression();
-
-      // Bail if there is a complex DWARF expression for now.
-      if (DIExpr && DIExpr->getNumElements() > 0)
-        continue;
+      bool IsSubfield = false;
+      unsigned StructOffset = 0;
+
+      // Handle fragments.
+      auto Fragment = DIExpr->getFragmentInfo();
+      if (DIExpr && Fragment) {
+        IsSubfield = true;
+        StructOffset = Fragment->OffsetInBits / 8;
+      } else if (DIExpr && DIExpr->getNumElements() > 0) {
+        continue; // Ignore unrecognized exprs.
+      }
 
       // Bail if operand 0 is not a valid register. This means the variable is a
       // simple constant, or is described by a complex expression.
@@ -817,19 +965,20 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
         continue;
 
       // Handle the two cases we can handle: indirect in memory and in register.
-      bool IsIndirect = DVInst->getOperand(1).isImm();
-      unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg());
+      unsigned CVReg = TRI->getCodeViewRegNum(Reg);
+      bool InMemory = DVInst->getOperand(1).isImm();
+      int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0;
       {
-        LocalVarDefRange DefRange;
-        if (IsIndirect) {
-          int64_t Offset = DVInst->getOperand(1).getImm();
-          DefRange = createDefRangeMem(CVReg, Offset);
-        } else {
-          DefRange = createDefRangeReg(CVReg);
-        }
+        LocalVarDefRange DR;
+        DR.CVRegister = CVReg;
+        DR.InMemory = InMemory;
+        DR.DataOffset = Offset;
+        DR.IsSubfield = IsSubfield;
+        DR.StructOffset = StructOffset;
+
         if (Var.DefRanges.empty() ||
-            Var.DefRanges.back().isDifferentLocation(DefRange)) {
-          Var.DefRanges.emplace_back(std::move(DefRange));
+            Var.DefRanges.back().isDifferentLocation(DR)) {
+          Var.DefRanges.emplace_back(std::move(DR));
         }
       }
 
@@ -837,8 +986,14 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
       const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
       const MCSymbol *End = getLabelAfterInsn(Range.second);
       if (!End) {
-        if (std::next(I) != E)
-          End = getLabelBeforeInsn(std::next(I)->first);
+        // This range is valid until the next overlapping bitpiece. In the
+        // common case, ranges will not be bitpieces, so they will overlap.
+        auto J = std::next(I);
+        while (J != E &&
+               !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+          ++J;
+        if (J != E)
+          End = getLabelBeforeInsn(J->first);
         else
           End = Asm->getFunctionEnd();
       }
@@ -873,6 +1028,8 @@ void CodeViewDebug::beginFunction(const MachineFunction *MF) {
   CurFn->FuncId = NextFuncId++;
   CurFn->Begin = Asm->getFunctionBegin();
 
+  OS.EmitCVFuncIdDirective(CurFn->FuncId);
+
   // Find the end of the function prolog.  First known non-DBG_VALUE and
   // non-frame setup location marks the beginning of the function body.
   // FIXME: is there a simpler a way to do this? Can we just search
@@ -933,6 +1090,9 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
   case dwarf::DW_TAG_base_type:
     return lowerTypeBasic(cast<DIBasicType>(Ty));
   case dwarf::DW_TAG_pointer_type:
+    if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type")
+      return lowerTypeVFTableShape(cast<DIDerivedType>(Ty));
+    LLVM_FALLTHROUGH;
   case dwarf::DW_TAG_reference_type:
   case dwarf::DW_TAG_rvalue_reference_type:
     return lowerTypePointer(cast<DIDerivedType>(Ty));
@@ -940,6 +1100,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
     return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
   case dwarf::DW_TAG_const_type:
   case dwarf::DW_TAG_volatile_type:
+  // TODO: add support for DW_TAG_atomic_type here
     return lowerTypeModifier(cast<DIDerivedType>(Ty));
   case dwarf::DW_TAG_subroutine_type:
     if (ClassTy) {
@@ -989,20 +1150,25 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
 
   uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
 
-  bool UndefinedSubrange = false;
 
-  // FIXME:
-  // There is a bug in the front-end where an array of a structure, which was
-  // declared as incomplete structure first, ends up not getting a size assigned
-  // to it. (PR28303)
+  // We want to assert that the element type multiplied by the array lengths
+  // match the size of the overall array. However, if we don't have complete
+  // type information for the base type, we can't make this assertion. This
+  // happens if limited debug info is enabled in this case:
+  //   struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
+  //   VTableOptzn array[3];
+  // The DICompositeType of VTableOptzn will have size zero, and the array will
+  // have size 3 * sizeof(void*), and we should avoid asserting.
+  //
+  // There is a related bug in the front-end where an array of a structure,
+  // which was declared as incomplete structure first, ends up not getting a
+  // size assigned to it. (PR28303)
   // Example:
   //   struct A(*p)[3];
   //   struct A { int f; } a[3];
-  //
-  // This needs to be fixed in the front-end, but in the meantime we don't want
-  // to trigger an assertion because of this.
-  if (Ty->getSizeInBits() == 0) {
-    UndefinedSubrange = true;
+  bool PartiallyIncomplete = false;
+  if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
+    PartiallyIncomplete = true;
   }
 
   // Add subranges to array type.
@@ -1021,18 +1187,24 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
     // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
     if (Count == -1) {
       Count = 1;
-      UndefinedSubrange = true;
+      PartiallyIncomplete = true;
     }
 
-    StringRef Name = (i == 0) ? Ty->getName() : "";
     // Update the element size and element type index for subsequent subranges.
     ElementSize *= Count;
-    ElementTypeIndex = TypeTable.writeArray(
-        ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name));
+
+    // If this is the outermost array, use the size from the array. It will be
+    // more accurate if PartiallyIncomplete is true.
+    uint64_t ArraySize =
+        (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
+
+    StringRef Name = (i == 0) ? Ty->getName() : "";
+    ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
+    ElementTypeIndex = TypeTable.writeKnownType(AR);
   }
 
-  (void)UndefinedSubrange;
-  assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8));
+  (void)PartiallyIncomplete;
+  assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
 
   return ElementTypeIndex;
 }
@@ -1080,20 +1252,20 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
     break;
   case dwarf::DW_ATE_signed:
     switch (ByteSize) {
-    case 1:  STK = SimpleTypeKind::SByte;      break;
-    case 2:  STK = SimpleTypeKind::Int16Short; break;
-    case 4:  STK = SimpleTypeKind::Int32;      break;
-    case 8:  STK = SimpleTypeKind::Int64Quad;  break;
-    case 16: STK = SimpleTypeKind::Int128Oct;  break;
+    case 1:  STK = SimpleTypeKind::SignedCharacter; break;
+    case 2:  STK = SimpleTypeKind::Int16Short;      break;
+    case 4:  STK = SimpleTypeKind::Int32;           break;
+    case 8:  STK = SimpleTypeKind::Int64Quad;       break;
+    case 16: STK = SimpleTypeKind::Int128Oct;       break;
     }
     break;
   case dwarf::DW_ATE_unsigned:
     switch (ByteSize) {
-    case 1:  STK = SimpleTypeKind::Byte;        break;
-    case 2:  STK = SimpleTypeKind::UInt16Short; break;
-    case 4:  STK = SimpleTypeKind::UInt32;      break;
-    case 8:  STK = SimpleTypeKind::UInt64Quad;  break;
-    case 16: STK = SimpleTypeKind::UInt128Oct;  break;
+    case 1:  STK = SimpleTypeKind::UnsignedCharacter; break;
+    case 2:  STK = SimpleTypeKind::UInt16Short;       break;
+    case 4:  STK = SimpleTypeKind::UInt32;            break;
+    case 8:  STK = SimpleTypeKind::UInt64Quad;        break;
+    case 16: STK = SimpleTypeKind::UInt128Oct;        break;
     }
     break;
   case dwarf::DW_ATE_UTF:
@@ -1133,13 +1305,6 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
 TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
   TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
 
-  // While processing the type being pointed to it is possible we already
-  // created this pointer type.  If so, we check here and return the existing
-  // pointer type.
-  auto I = TypeIndices.find({Ty, nullptr});
-  if (I != TypeIndices.end())
-    return I->second;
-
   // Pointers to simple types can use SimpleTypeMode, rather than having a
   // dedicated pointer type record.
   if (PointeeTI.isSimple() &&
@@ -1171,7 +1336,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
   // do.
   PointerOptions PO = PointerOptions::None;
   PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
-  return TypeTable.writePointer(PR);
+  return TypeTable.writeKnownType(PR);
 }
 
 static PointerToMemberRepresentation
@@ -1222,7 +1387,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
   MemberPointerInfo MPI(
       ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
   PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
-  return TypeTable.writePointer(PR);
+  return TypeTable.writeKnownType(PR);
 }
 
 /// Given a DWARF calling convention, get the CodeView equivalent. If we don't
@@ -1244,7 +1409,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
   bool IsModifier = true;
   const DIType *BaseTy = Ty;
   while (IsModifier && BaseTy) {
-    // FIXME: Need to add DWARF tag for __unaligned.
+    // FIXME: Need to add DWARF tags for __unaligned and _Atomic
     switch (BaseTy->getTag()) {
     case dwarf::DW_TAG_const_type:
       Mods |= ModifierOptions::Const;
@@ -1260,16 +1425,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
       BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
   }
   TypeIndex ModifiedTI = getTypeIndex(BaseTy);
-
-  // While processing the type being pointed to, it is possible we already
-  // created this modifier type.  If so, we check here and return the existing
-  // modifier type.
-  auto I = TypeIndices.find({Ty, nullptr});
-  if (I != TypeIndices.end())
-    return I->second;
-
   ModifierRecord MR(ModifiedTI, Mods);
-  return TypeTable.writeModifier(MR);
+  return TypeTable.writeKnownType(MR);
 }
 
 TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
@@ -1286,13 +1443,13 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
   ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
                             ArgTypeIndices.size(), ArgListIndex);
-  return TypeTable.writeProcedure(Procedure);
+  return TypeTable.writeKnownType(Procedure);
 }
 
 TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
@@ -1319,20 +1476,29 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
   // TODO: Need to use the correct values for:
   //       FunctionOptions
   //       ThisPointerAdjustment.
-  TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord(
-      ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None,
-      ArgTypeIndices.size(), ArgListIndex, ThisAdjustment));
+  MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
+                           FunctionOptions::None, ArgTypeIndices.size(),
+                           ArgListIndex, ThisAdjustment);
+  TypeIndex TI = TypeTable.writeKnownType(MFR);
 
   return TI;
 }
 
+TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
+  unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+  SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
+
+  VFTableShapeRecord VFTSR(Slots);
+  return TypeTable.writeKnownType(VFTSR);
+}
+
 static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
   switch (Flags & DINode::FlagAccessibility) {
   case DINode::FlagPrivate:   return MemberAccess::Private;
@@ -1420,25 +1586,28 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
   if (Ty->isForwardDecl()) {
     CO |= ClassOptions::ForwardReference;
   } else {
-    FieldListRecordBuilder Fields;
+    FieldListRecordBuilder FLRB(TypeTable);
+
+    FLRB.begin();
     for (const DINode *Element : Ty->getElements()) {
       // We assume that the frontend provides all members in source declaration
       // order, which is what MSVC does.
       if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
-        Fields.writeEnumerator(EnumeratorRecord(
-            MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()),
-            Enumerator->getName()));
+        EnumeratorRecord ER(MemberAccess::Public,
+                            APSInt::getUnsigned(Enumerator->getValue()),
+                            Enumerator->getName());
+        FLRB.writeMemberType(ER);
         EnumeratorCount++;
       }
     }
-    FTI = TypeTable.writeFieldList(Fields);
+    FTI = FLRB.end();
   }
 
   std::string FullName = getFullyQualifiedName(Ty);
 
-  return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName,
-                                        Ty->getIdentifier(),
-                                        getTypeIndex(Ty->getBaseType())));
+  EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
+                getTypeIndex(Ty->getBaseType()));
+  return TypeTable.writeKnownType(ER);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1465,6 +1634,8 @@ struct llvm::ClassInfo {
   // Direct overloaded methods gathered by name.
   MethodsMap Methods;
 
+  TypeIndex VShapeTI;
+
   std::vector<const DICompositeType *> NestedClasses;
 };
 
@@ -1513,11 +1684,13 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
         collectMemberInfo(Info, DDTy);
       } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
         Info.Inheritance.push_back(DDTy);
+      } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
+                 DDTy->getName() == "__vtbl_ptr_type") {
+        Info.VShapeTI = getTypeIndex(DDTy);
       } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
         // Ignore friend members. It appears that MSVC emitted info about
         // friends in the past, but modern versions do not.
       }
-      // FIXME: Get Clang to emit function virtual table here and handle it.
     } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
       Info.NestedClasses.push_back(Composite);
     }
@@ -1533,9 +1706,9 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
   ClassOptions CO =
       ClassOptions::ForwardReference | getCommonClassOptions(Ty);
   std::string FullName = getFullyQualifiedName(Ty);
-  TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord(
-      Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(),
-      TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()));
+  ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
+                 FullName, Ty->getIdentifier());
+  TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1559,14 +1732,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
 
   uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
 
-  TypeIndex ClassTI = TypeTable.writeClass(ClassRecord(
-      Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI,
-      TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()));
+  ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
+                 SizeInBytes, FullName, Ty->getIdentifier());
+  TypeIndex ClassTI = TypeTable.writeKnownType(CR);
 
-  TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
-      ClassTI, TypeTable.writeStringId(StringIdRecord(
-                   TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
-      Ty->getLine()));
+  StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+  TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+  UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
+  TypeTable.writeKnownType(USLR);
 
   addToUDTs(Ty, ClassTI);
 
@@ -1577,9 +1750,8 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
   ClassOptions CO =
       ClassOptions::ForwardReference | getCommonClassOptions(Ty);
   std::string FullName = getFullyQualifiedName(Ty);
-  TypeIndex FwdDeclTI =
-      TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0,
-                                       FullName, Ty->getIdentifier()));
+  UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
+  TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1599,14 +1771,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
   uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
   std::string FullName = getFullyQualifiedName(Ty);
 
-  TypeIndex UnionTI = TypeTable.writeUnion(
-      UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName,
-                  Ty->getIdentifier()));
+  UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
+                 Ty->getIdentifier());
+  TypeIndex UnionTI = TypeTable.writeKnownType(UR);
 
-  TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
-      UnionTI, TypeTable.writeStringId(StringIdRecord(
-                   TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
-      Ty->getLine()));
+  StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+  TypeIndex SIRI = TypeTable.writeKnownType(SIR);
+  UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
+  TypeTable.writeKnownType(USLR);
 
   addToUDTs(Ty, UnionTI);
 
@@ -1621,7 +1793,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
   // list record.
   unsigned MemberCount = 0;
   ClassInfo Info = collectClassInfo(Ty);
-  FieldListRecordBuilder Fields;
+  FieldListRecordBuilder FLBR(TypeTable);
+  FLBR.begin();
 
   // Create base classes.
   for (const DIDerivedType *I : Info.Inheritance) {
@@ -1631,16 +1804,22 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
       unsigned VBPtrOffset = 0;
       // FIXME: Despite the accessor name, the offset is really in bytes.
       unsigned VBTableIndex = I->getOffsetInBits() / 4;
-      Fields.writeVirtualBaseClass(VirtualBaseClassRecord(
-          translateAccessFlags(Ty->getTag(), I->getFlags()),
+      auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase
+                            ? TypeRecordKind::IndirectVirtualBaseClass
+                            : TypeRecordKind::VirtualBaseClass;
+      VirtualBaseClassRecord VBCR(
+          RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()),
           getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
-          VBTableIndex));
+          VBTableIndex);
+
+      FLBR.writeMemberType(VBCR);
     } else {
       assert(I->getOffsetInBits() % 8 == 0 &&
              "bases must be on byte boundaries");
-      Fields.writeBaseClass(BaseClassRecord(
-          translateAccessFlags(Ty->getTag(), I->getFlags()),
-          getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8));
+      BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
+                          getTypeIndex(I->getBaseType()),
+                          I->getOffsetInBits() / 8);
+      FLBR.writeMemberType(BCR);
     }
   }
 
@@ -1653,8 +1832,17 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
         translateAccessFlags(Ty->getTag(), Member->getFlags());
 
     if (Member->isStaticMember()) {
-      Fields.writeStaticDataMember(
-          StaticDataMemberRecord(Access, MemberBaseType, MemberName));
+      StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
+      FLBR.writeMemberType(SDMR);
+      MemberCount++;
+      continue;
+    }
+
+    // Virtual function pointer member.
+    if ((Member->getFlags() & DINode::FlagArtificial) &&
+        Member->getName().startswith("_vptr$")) {
+      VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
+      FLBR.writeMemberType(VFPR);
       MemberCount++;
       continue;
     }
@@ -1669,12 +1857,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
         MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
       }
       StartBitOffset -= MemberOffsetInBits;
-      MemberBaseType = TypeTable.writeBitField(BitFieldRecord(
-          MemberBaseType, Member->getSizeInBits(), StartBitOffset));
+      BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
+                         StartBitOffset);
+      MemberBaseType = TypeTable.writeKnownType(BFR);
     }
     uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
-    Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType,
-                                            MemberOffsetInBytes, MemberName));
+    DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
+                         MemberName);
+    FLBR.writeMemberType(DMR);
     MemberCount++;
   }
 
@@ -1691,33 +1881,32 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
       if (Introduced)
         VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
 
-      Methods.push_back(
-          OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced),
-                          translateMethodOptionFlags(SP),
-                          translateAccessFlags(Ty->getTag(), SP->getFlags()),
-                          VFTableOffset, Name));
+      Methods.push_back(OneMethodRecord(
+          MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()),
+          translateMethodKindFlags(SP, Introduced),
+          translateMethodOptionFlags(SP), VFTableOffset, Name));
       MemberCount++;
     }
     assert(Methods.size() > 0 && "Empty methods map entry");
     if (Methods.size() == 1)
-      Fields.writeOneMethod(Methods[0]);
+      FLBR.writeMemberType(Methods[0]);
     else {
-      TypeIndex MethodList =
-          TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods));
-      Fields.writeOverloadedMethod(
-          OverloadedMethodRecord(Methods.size(), MethodList, Name));
+      MethodOverloadListRecord MOLR(Methods);
+      TypeIndex MethodList = TypeTable.writeKnownType(MOLR);
+      OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
+      FLBR.writeMemberType(OMR);
     }
   }
 
   // Create nested classes.
   for (const DICompositeType *Nested : Info.NestedClasses) {
     NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
-    Fields.writeNestedType(R);
+    FLBR.writeMemberType(R);
     MemberCount++;
   }
 
-  TypeIndex FieldTI = TypeTable.writeFieldList(Fields);
-  return std::make_tuple(FieldTI, TypeIndex(), MemberCount,
+  TypeIndex FieldTI = FLBR.end();
+  return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
                          !Info.NestedClasses.empty());
 }
 
@@ -1725,7 +1914,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
   if (!VBPType.getIndex()) {
     // Make a 'const int *' type.
     ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
-    TypeIndex ModifiedTI = TypeTable.writeModifier(MR);
+    TypeIndex ModifiedTI = TypeTable.writeKnownType(MR);
 
     PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
                                                   : PointerKind::Near32;
@@ -1733,7 +1922,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
     PointerOptions PO = PointerOptions::None;
     PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
 
-    VBPType = TypeTable.writePointer(PR);
+    VBPType = TypeTable.writeKnownType(PR);
   }
 
   return VBPType;
@@ -1880,30 +2069,47 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
   SmallString<20> BytePrefix;
   for (const LocalVarDefRange &DefRange : Var.DefRanges) {
     BytePrefix.clear();
-    // FIXME: Handle bitpieces.
-    if (DefRange.StructOffset != 0)
-      continue;
-
     if (DefRange.InMemory) {
-      DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0,
-                                 0, 0, ArrayRef<LocalVariableAddrGap>());
+      uint16_t RegRelFlags = 0;
+      if (DefRange.IsSubfield) {
+        RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+                      (DefRange.StructOffset
+                       << DefRangeRegisterRelSym::OffsetInParentShift);
+      }
+      DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
+      Sym.Hdr.Register = DefRange.CVRegister;
+      Sym.Hdr.Flags = RegRelFlags;
+      Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
       ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
       BytePrefix +=
           StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
       BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&Sym.Header),
-                    sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+          StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
     } else {
       assert(DefRange.DataOffset == 0 && "unexpected offset into register");
-      // Unclear what matters here.
-      DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0,
-                              ArrayRef<LocalVariableAddrGap>());
-      ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&Sym.Header),
-                    sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+      if (DefRange.IsSubfield) {
+        // Unclear what matters here.
+        DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
+        Sym.Hdr.Register = DefRange.CVRegister;
+        Sym.Hdr.MayHaveNoName = 0;
+        Sym.Hdr.OffsetInParent = DefRange.StructOffset;
+
+        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+                                sizeof(SymKind));
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+                                sizeof(Sym.Hdr));
+      } else {
+        // Unclear what matters here.
+        DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
+        Sym.Hdr.Register = DefRange.CVRegister;
+        Sym.Hdr.MayHaveNoName = 0;
+        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+                                sizeof(SymKind));
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+                                sizeof(Sym.Hdr));
+      }
     }
     OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
   }
@@ -1983,6 +2189,15 @@ void CodeViewDebug::emitDebugInfoForUDTs(
 }
 
 void CodeViewDebug::emitDebugInfoForGlobals() {
+  DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
+      GlobalMap;
+  for (const GlobalVariable &GV : MMI->getModule()->globals()) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV.getDebugInfo(GVEs);
+    for (const auto *GVE : GVEs)
+      GlobalMap[GVE] = &GV;
+  }
+
   NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
   for (const MDNode *Node : CUs->operands()) {
     const auto *CU = cast<DICompileUnit>(Node);
@@ -1992,31 +2207,32 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
     // it if we have at least one global to emit.
     switchToDebugSectionForSymbol(nullptr);
     MCSymbol *EndLabel = nullptr;
-    for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
-      if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+    for (const auto *GVE : CU->getGlobalVariables()) {
+      if (const auto *GV = GlobalMap.lookup(GVE))
         if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
           if (!EndLabel) {
             OS.AddComment("Symbol subsection for globals");
             EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
           }
-          emitDebugInfoForGlobal(G, Asm->getSymbol(GV));
+          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+          emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
         }
-      }
     }
     if (EndLabel)
       endCVSubsection(EndLabel);
 
     // Second, emit each global that is in a comdat into its own .debug$S
     // section along with its own symbol substream.
-    for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
-      if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+    for (const auto *GVE : CU->getGlobalVariables()) {
+      if (const auto *GV = GlobalMap.lookup(GVE)) {
         if (GV->hasComdat()) {
           MCSymbol *GVSym = Asm->getSymbol(GV);
           OS.AddComment("Symbol subsection for " +
                         Twine(GlobalValue::getRealLinkageName(GV->getName())));
           switchToDebugSectionForSymbol(GVSym);
           EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
-          emitDebugInfoForGlobal(G, GVSym);
+          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+          emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
           endCVSubsection(EndLabel);
         }
       }
@@ -2037,6 +2253,7 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
 }
 
 void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+                                           const GlobalVariable *GV,
                                            MCSymbol *GVSym) {
   // DataSym record, see SymbolRecord.h for more info.
   // FIXME: Thread local data, etc
@@ -2045,7 +2262,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
   OS.AddComment("Record length");
   OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
   OS.EmitLabel(DataBegin);
-  const auto *GV = cast<GlobalVariable>(DIGV->getVariable());
   if (DIGV->isLocalToUnit()) {
     if (GV->isThreadLocal()) {
       OS.AddComment("Record kind: S_LTHREAD32");
@@ -2066,7 +2282,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
   OS.AddComment("Type");
   OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
   OS.AddComment("DataOffset");
-  OS.EmitCOFFSecRel32(GVSym);
+  OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
   OS.AddComment("Segment");
   OS.EmitCOFFSectionIndex(GVSym);
   OS.AddComment("Name");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index e4bbd61..3dd4315 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -20,8 +20,8 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCStreamer.h"
@@ -36,7 +36,8 @@ struct ClassInfo;
 /// \brief Collects and handles line tables information in a CodeView format.
 class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   MCStreamer &OS;
-  codeview::MemoryTypeTableBuilder TypeTable;
+  llvm::BumpPtrAllocator Allocator;
+  codeview::TypeTableBuilder TypeTable;
 
   /// Represents the most general definition range.
   struct LocalVarDefRange {
@@ -47,9 +48,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     /// Offset of variable data in memory.
     int DataOffset : 31;
 
-    /// Offset of the data into the user level struct. If zero, no splitting
-    /// occurred.
-    uint16_t StructOffset;
+    /// Non-zero if this is a piece of an aggregate.
+    uint16_t IsSubfield : 1;
+
+    /// Offset into aggregate.
+    uint16_t StructOffset : 15;
 
     /// Register containing the data or the register base of the memory
     /// location containing the data.
@@ -59,14 +62,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     /// ranges.
     bool isDifferentLocation(LocalVarDefRange &O) {
       return InMemory != O.InMemory || DataOffset != O.DataOffset ||
-             StructOffset != O.StructOffset || CVRegister != O.CVRegister;
+             IsSubfield != O.IsSubfield || StructOffset != O.StructOffset ||
+             CVRegister != O.CVRegister;
     }
 
     SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
   };
 
   static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
-  static LocalVarDefRange createDefRangeReg(uint16_t CVRegister);
+  static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
+                                                bool InMemory, int Offset,
+                                                bool IsSubfield,
+                                                uint16_t StructOffset);
 
   /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
   struct LocalVariable {
@@ -190,6 +197,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitTypeInformation();
 
+  void emitCompilerInformation();
+
   void emitInlineeLinesSubsection();
 
   void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
@@ -201,7 +210,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   void emitDebugInfoForUDTs(
       ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
 
-  void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym);
+  void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+                              const GlobalVariable *GV, MCSymbol *GVSym);
 
   /// Opens a subsection of the given kind in a .debug$S codeview section.
   /// Returns an end label for use with endCVSubsection when the subsection is
@@ -217,7 +227,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void collectVariableInfo(const DISubprogram *SP);
 
-  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed);
+  void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
 
   /// Records information about a local variable in the appropriate scope. In
   /// particular, locals from inlined code live inside the inlining site.
@@ -251,6 +261,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+  codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
                                               const DIType *ClassTy,
                                               int ThisAdjustment);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 2aaa85a..8799189 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -63,10 +63,10 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
 ///
 void DIEAbbrev::Emit(const AsmPrinter *AP) const {
   // Emit its Dwarf tag type.
-  AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+  AP->EmitULEB128(Tag, dwarf::TagString(Tag).data());
 
   // Emit whether it has children DIEs.
-  AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children));
+  AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
 
   // For each attribute description.
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -74,11 +74,18 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
 
     // Emit attribute type.
     AP->EmitULEB128(AttrData.getAttribute(),
-                    dwarf::AttributeString(AttrData.getAttribute()));
+                    dwarf::AttributeString(AttrData.getAttribute()).data());
 
     // Emit form type.
     AP->EmitULEB128(AttrData.getForm(),
-                    dwarf::FormEncodingString(AttrData.getForm()));
+                    dwarf::FormEncodingString(AttrData.getForm()).data());
+
+    // Emit value for DW_FORM_implicit_const.
+    if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
+      assert(AP->getDwarfVersion() >= 5 &&
+            "DW_FORM_implicit_const is supported starting from DWARFv5");
+      AP->EmitSLEB128(AttrData.getValue());
+    }
   }
 
   // Mark end of abbreviation.
@@ -108,24 +115,73 @@ void DIEAbbrev::print(raw_ostream &O) {
 LLVM_DUMP_METHOD
 void DIEAbbrev::dump() { print(dbgs()); }
 
+//===----------------------------------------------------------------------===//
+// DIEAbbrevSet Implementation
+//===----------------------------------------------------------------------===//
+
+DIEAbbrevSet::~DIEAbbrevSet() {
+  for (DIEAbbrev *Abbrev : Abbreviations)
+    Abbrev->~DIEAbbrev();
+}
+
+DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
+
+  FoldingSetNodeID ID;
+  DIEAbbrev Abbrev = Die.generateAbbrev();
+  Abbrev.Profile(ID);
+
+  void *InsertPos;
+  if (DIEAbbrev *Existing =
+          AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+    Die.setAbbrevNumber(Existing->getNumber());
+    return *Existing;
+  }
+
+  // Move the abbreviation to the heap and assign a number.
+  DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev));
+  Abbreviations.push_back(New);
+  New->setNumber(Abbreviations.size());
+  Die.setAbbrevNumber(Abbreviations.size());
+
+  // Store it for lookup.
+  AbbreviationsSet.InsertNode(New, InsertPos);
+  return *New;
+}
+
+void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
+  if (!Abbreviations.empty()) {
+    // Start the debug abbrev section.
+    AP->OutStreamer->SwitchSection(Section);
+    AP->emitDwarfAbbrevs(Abbreviations);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE *DIE::getParent() const {
+  return Owner.dyn_cast<DIE*>();
+}
+
 DIEAbbrev DIE::generateAbbrev() const {
   DIEAbbrev Abbrev(Tag, hasChildren());
   for (const DIEValue &V : values())
-    Abbrev.AddAttribute(V.getAttribute(), V.getForm());
+    if (V.getForm() == dwarf::DW_FORM_implicit_const)
+      Abbrev.AddImplicitConstAttribute(V.getAttribute(),
+                                       V.getDIEInteger().getValue());
+    else
+      Abbrev.AddAttribute(V.getAttribute(), V.getForm());
   return Abbrev;
 }
 
-/// Climb up the parent chain to get the unit DIE to which this DIE
-/// belongs.
-const DIE *DIE::getUnit() const {
-  const DIE *Cu = getUnitOrNull();
-  assert(Cu && "We should not have orphaned DIEs.");
-  return Cu;
+unsigned DIE::getDebugSectionOffset() const {
+  const DIEUnit *Unit = getUnit();
+  assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
+  return Unit->getDebugSectionOffset() + getOffset();
 }
 
-/// Climb up the parent chain to get the unit DIE this DIE belongs
-/// to. Return NULL if DIE is not added to an owner yet.
-const DIE *DIE::getUnitOrNull() const {
+const DIE *DIE::getUnitDie() const {
   const DIE *p = this;
   while (p) {
     if (p->getTag() == dwarf::DW_TAG_compile_unit ||
@@ -136,6 +192,13 @@ const DIE *DIE::getUnitOrNull() const {
   return nullptr;
 }
 
+const DIEUnit *DIE::getUnit() const {
+  const DIE *UnitDie = getUnitDie();
+  if (UnitDie)
+    return UnitDie->Owner.dyn_cast<DIEUnit*>();
+  return nullptr;
+}
+
 DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
   // Iterate through all the attributes until we find the one we're
   // looking for, if we can't find it return NULL.
@@ -191,6 +254,55 @@ void DIE::dump() {
   print(dbgs());
 }
 
+unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
+                                       DIEAbbrevSet &AbbrevSet,
+                                       unsigned CUOffset) {
+  // Unique the abbreviation and fill in the abbreviation number so this DIE
+  // can be emitted.
+  const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this);
+
+  // Set compile/type unit relative offset of this DIE.
+  setOffset(CUOffset);
+
+  // Add the byte size of the abbreviation code.
+  CUOffset += getULEB128Size(getAbbrevNumber());
+
+  // Add the byte size of all the DIE attribute values.
+  for (const auto &V : values())
+    CUOffset += V.SizeOf(AP);
+
+  // Let the children compute their offsets and abbreviation numbers.
+  if (hasChildren()) {
+    (void)Abbrev;
+    assert(Abbrev.hasChildren() && "Children flag not set");
+
+    for (auto &Child : children())
+      CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset);
+
+    // Each child chain is terminated with a zero byte, adjust the offset.
+    CUOffset += sizeof(int8_t);
+  }
+
+  // Compute the byte size of this DIE and all of its children correctly. This
+  // is needed so that top level DIE can help the compile unit set its length
+  // correctly.
+  setSize(CUOffset - getOffset());
+  return CUOffset;
+}
+
+//===----------------------------------------------------------------------===//
+// DIEUnit Implementation
+//===----------------------------------------------------------------------===//
+DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
+    : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V),
+      AddrSize(A)
+{
+  Die.Owner = this;
+  assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+          UnitTag == dwarf::DW_TAG_type_unit ||
+          UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+}
+
 void DIEValue::EmitValue(const AsmPrinter *AP) const {
   switch (Ty) {
   case isNone:
@@ -240,67 +352,121 @@ void DIEValue::dump() const {
 /// EmitValue - Emit integer of appropriate size.
 ///
 void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
-  unsigned Size = ~0U;
   switch (Form) {
+  case dwarf::DW_FORM_implicit_const:
+    LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_flag_present:
     // Emit something to keep the lines and comments in sync.
     // FIXME: Is there a better way to do this?
     Asm->OutStreamer->AddBlankLine();
     return;
-  case dwarf::DW_FORM_flag:  // Fall thru
-  case dwarf::DW_FORM_ref1:  // Fall thru
-  case dwarf::DW_FORM_data1: Size = 1; break;
-  case dwarf::DW_FORM_ref2:  // Fall thru
-  case dwarf::DW_FORM_data2: Size = 2; break;
-  case dwarf::DW_FORM_sec_offset: // Fall thru
-  case dwarf::DW_FORM_strp: // Fall thru
-  case dwarf::DW_FORM_ref4:  // Fall thru
-  case dwarf::DW_FORM_data4: Size = 4; break;
-  case dwarf::DW_FORM_ref8:  // Fall thru
-  case dwarf::DW_FORM_ref_sig8:  // Fall thru
-  case dwarf::DW_FORM_data8: Size = 8; break;
-  case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+  case dwarf::DW_FORM_flag:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref1:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data1:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref2:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data2:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref4:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data4:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sig8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_ref_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_strp_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_line_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_sec_offset:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp_sup:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sup:
+    LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_addr:
-    Size = Asm->getPointerSize();
-    break;
+    LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_ref_addr:
-    Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
-    break;
+    Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
+    return;
+  case dwarf::DW_FORM_GNU_str_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_addr_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_udata:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_udata:
+    Asm->EmitULEB128(Integer);
+    return;
+  case dwarf::DW_FORM_sdata:
+    Asm->EmitSLEB128(Integer);
+    return;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
-  Asm->OutStreamer->EmitIntValue(Integer, Size);
 }
 
 /// SizeOf - Determine size of integer value in bytes.
 ///
 unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
+  case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_flag_present: return 0;
-  case dwarf::DW_FORM_flag:  // Fall thru
-  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_flag:  LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref1:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data1: return sizeof(int8_t);
-  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_ref2:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data2: return sizeof(int16_t);
-  case dwarf::DW_FORM_sec_offset: // Fall thru
-  case dwarf::DW_FORM_strp: // Fall thru
-  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_ref4:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
-  case dwarf::DW_FORM_ref8:  // Fall thru
-  case dwarf::DW_FORM_ref_sig8:  // Fall thru
+  case dwarf::DW_FORM_ref8:  LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sig8:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
-  case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:
-    return AP->getPointerSize();
   case dwarf::DW_FORM_ref_addr:
-    if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+    if (AP->getDwarfVersion() == 2)
       return AP->getPointerSize();
-    return sizeof(int32_t);
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_ref_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_strp_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_line_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_sec_offset:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp_sup:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sup:
+    switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+    case dwarf::DWARF32:
+      return 4;
+    case dwarf::DWARF64:
+      return 8;
+    }
+    llvm_unreachable("Invalid DWARF format");
+  case dwarf::DW_FORM_GNU_str_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_addr_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_udata:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_udata:
+    return getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata:
+    return getSLEB128Size(Integer);
+  case dwarf::DW_FORM_addr:
+    return AP->getPointerSize();
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -318,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const {
 /// EmitValue - Emit expression value.
 ///
 void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
-  AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+  AP->EmitDebugValue(Expr, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of expression value in bytes.
@@ -343,7 +509,8 @@ void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelReference(Label, SizeOf(AP, Form),
                          Form == dwarf::DW_FORM_strp ||
                              Form == dwarf::DW_FORM_sec_offset ||
-                             Form == dwarf::DW_FORM_ref_addr);
+                             Form == dwarf::DW_FORM_ref_addr ||
+                             Form == dwarf::DW_FORM_data4);
 }
 
 /// SizeOf - Determine size of label value in bytes.
@@ -435,6 +602,29 @@ void DIEString::print(raw_ostream &O) const {
 }
 
 //===----------------------------------------------------------------------===//
+// DIEInlineString Implementation
+//===----------------------------------------------------------------------===//
+void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  if (Form == dwarf::DW_FORM_string) {
+    for (char ch : S)
+      AP->EmitInt8(ch);
+    AP->EmitInt8(0);
+    return;
+  }
+  llvm_unreachable("Expected valid string form");
+}
+
+unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  // Emit string bytes + NULL byte.
+  return S.size() + 1;
+}
+
+LLVM_DUMP_METHOD
+void DIEInlineString::print(raw_ostream &O) const {
+  O << "InlineString: " << S;
+}
+
+//===----------------------------------------------------------------------===//
 // DIEEntry Implementation
 //===----------------------------------------------------------------------===//
 
@@ -442,35 +632,69 @@ void DIEString::print(raw_ostream &O) const {
 ///
 void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
 
-  if (Form == dwarf::DW_FORM_ref_addr) {
-    const DwarfDebug *DD = AP->getDwarfDebug();
-    unsigned Addr = Entry->getOffset();
-    assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
-    // For DW_FORM_ref_addr, output the offset from beginning of debug info
-    // section. Entry->getOffset() returns the offset from start of the
-    // compile unit.
-    DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
-    assert(CU && "CUDie should belong to a CU.");
-    Addr += CU->getDebugInfoOffset();
-    if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
-      AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
-                              DIEEntry::getRefAddrSize(AP));
-    else
-      AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
-  } else
-    AP->EmitInt32(Entry->getOffset());
-}
-
-unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
-  // DWARF4: References that use the attribute form DW_FORM_ref_addr are
-  // specified to be four bytes in the DWARF 32-bit format and eight bytes
-  // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
-  // references have the same size as an address on the target system.
-  const DwarfDebug *DD = AP->getDwarfDebug();
-  assert(DD && "Expected Dwarf Debug info to be available");
-  if (DD->getDwarfVersion() == 2)
-    return AP->getPointerSize();
-  return sizeof(int32_t);
+  switch (Form) {
+  case dwarf::DW_FORM_ref1:
+  case dwarf::DW_FORM_ref2:
+  case dwarf::DW_FORM_ref4:
+  case dwarf::DW_FORM_ref8:
+    AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form));
+    return;
+
+  case dwarf::DW_FORM_ref_udata:
+    AP->EmitULEB128(Entry->getOffset());
+    return;
+
+  case dwarf::DW_FORM_ref_addr: {
+    // Get the absolute offset for this DIE within the debug info/types section.
+    unsigned Addr = Entry->getDebugSectionOffset();
+    if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+      const DwarfDebug *DD = AP->getDwarfDebug();
+      if (DD)
+        assert(!DD->useSplitDwarf() &&
+               "TODO: dwo files can't have relocations.");
+      const DIEUnit *Unit = Entry->getUnit();
+      assert(Unit && "CUDie should belong to a CU.");
+      MCSection *Section = Unit->getSection();
+      if (Section) {
+        const MCSymbol *SectionSym = Section->getBeginSymbol();
+        AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+        return;
+      }
+    }
+    AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
+    return;
+  }
+  default:
+    llvm_unreachable("Improper form for DIE reference");
+  }
+}
+
+unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_ref1:
+    return 1;
+  case dwarf::DW_FORM_ref2:
+    return 2;
+  case dwarf::DW_FORM_ref4:
+    return 4;
+  case dwarf::DW_FORM_ref8:
+    return 8;
+  case dwarf::DW_FORM_ref_udata:
+    return getULEB128Size(Entry->getOffset());
+  case dwarf::DW_FORM_ref_addr:
+    if (AP->getDwarfVersion() == 2)
+      return AP->getPointerSize();
+    switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+    case dwarf::DWARF32:
+      return 4;
+    case dwarf::DWARF64:
+      return 8;
+    }
+    llvm_unreachable("Invalid DWARF format");
+
+  default:
+    llvm_unreachable("Improper form for DIE reference");
+  }
 }
 
 LLVM_DUMP_METHOD
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 74c47d1..d8ecc7c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -330,6 +330,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
     addULEB128(dwarf::DW_FORM_string);
     addString(Value.getDIEString().getString());
     break;
+  case DIEValue::isInlineString:
+    addULEB128('A');
+    addULEB128(Attribute);
+    addULEB128(dwarf::DW_FORM_string);
+    addString(Value.getDIEInlineString().getString());
+    break;
   case DIEValue::isBlock:
   case DIEValue::isLoc:
   case DIEValue::isLocList:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index adc536f..22fd7bb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -31,7 +31,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   assert(MI.isDebugValue());
   assert(MI.getNumOperands() == 4);
   // If location of variable is described using a register (directly or
-  // indirecltly), this register is always a first operand.
+  // indirectly), this register is always a first operand.
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
@@ -83,7 +83,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
-  const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var);
+  const auto &VarPos = find(VarSet, Var);
   assert(VarPos != VarSet.end());
   VarSet.erase(VarPos);
   // Don't keep empty sets in a map to keep it as small as possible.
@@ -96,7 +96,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
                                InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
-  assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
+  assert(!is_contained(VarSet, Var));
   VarSet.push_back(Var);
 }
 
@@ -134,8 +134,8 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
   // as the return instruction.
   DebugLoc LastLoc = LastMI->getDebugLoc();
   auto Res = LastMI;
-  for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
-       E = MBB.rend();
+  for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
+                                                 E = MBB.rend();
        I != E; ++I) {
     if (I->getDebugLoc() != LastLoc)
       return &*Res;
@@ -164,7 +164,9 @@ static void collectChangingRegs(const MachineFunction *MF,
       // Look for register defs and register masks. Register masks are
       // typically on calls and they clobber everything not in the mask.
       for (const MachineOperand &MO : MI.operands()) {
-        if (MO.isReg() && MO.isDef() && MO.getReg()) {
+        // Skip virtual registers since they are handled by the parent.
+        if (MO.isReg() && MO.isDef() && MO.getReg() &&
+            !TRI->isVirtualRegister(MO.getReg())) {
           for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
                ++AI)
             Regs.set(*AI);
@@ -192,12 +194,18 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
         // some variables.
         for (const MachineOperand &MO : MI.operands()) {
           if (MO.isReg() && MO.isDef() && MO.getReg()) {
+            // If this is a virtual register, only clobber it since it doesn't
+            // have aliases.
+            if (TRI->isVirtualRegister(MO.getReg()))
+              clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
             // If this is a register def operand, it may end a debug value
             // range.
-            for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
-                 ++AI)
-              if (ChangingRegs.test(*AI))
-                clobberRegisterUses(RegVars, *AI, Result, MI);
+            else {
+              for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+                   ++AI)
+                if (ChangingRegs.test(*AI))
+                  clobberRegisterUses(RegVars, *AI, Result, MI);
+            }
           } else if (MO.isRegMask()) {
             // If this is a register mask operand, clobber all debug values in
             // non-CSRs.
@@ -238,7 +246,8 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
     if (!MBB.empty() && &MBB != &MF->back()) {
       for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
         auto CurElem = I++; // CurElem can be erased below.
-        if (ChangingRegs.test(CurElem->first))
+        if (TRI->isVirtualRegister(CurElem->first) ||
+            ChangingRegs.test(CurElem->first))
           clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 16ffe2e..9419098 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
@@ -62,14 +63,14 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
-// Determine the relative position of the pieces described by P1 and P2.
-// Returns  -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-// 1 if P1 is entirely after P2.
-int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
-  unsigned l1 = P1->getBitPieceOffset();
-  unsigned l2 = P2->getBitPieceOffset();
-  unsigned r1 = l1 + P1->getBitPieceSize();
-  unsigned r2 = l2 + P2->getBitPieceSize();
+int DebugHandlerBase::fragmentCmp(const DIExpression *P1,
+                                  const DIExpression *P2) {
+  auto Fragment1 = *P1->getFragmentInfo();
+  auto Fragment2 = *P2->getFragmentInfo();
+  unsigned l1 = Fragment1.OffsetInBits;
+  unsigned l2 = Fragment2.OffsetInBits;
+  unsigned r1 = l1 + Fragment1.SizeInBits;
+  unsigned r2 = l2 + Fragment2.SizeInBits;
   if (r1 <= l2)
     return -1;
   else if (r2 <= l1)
@@ -78,11 +79,11 @@ int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
     return 0;
 }
 
-/// Determine whether two variable pieces overlap.
-bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
-  if (!P1->isBitPiece() || !P2->isBitPiece())
+bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1,
+                                        const DIExpression *P2) {
+  if (!P1->isFragment() || !P2->isFragment())
     return true;
-  return pieceCmp(P1, P2) == 0;
+  return fragmentCmp(P1, P2) == 0;
 }
 
 /// If this type is derived from a base type then return base type size.
@@ -97,7 +98,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
-      Tag != dwarf::DW_TAG_restrict_type)
+      Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
     return DDTy->getSizeInBits();
 
   DIType *BaseType = DDTy->getBaseType().resolve();
@@ -141,14 +142,15 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
     if (DIVar->isParameter() &&
         getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
-      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
-        // Mark all non-overlapping initial pieces.
+      if (Ranges.front().first->getDebugExpression()->isFragment()) {
+        // Mark all non-overlapping initial fragments.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
-          const DIExpression *Piece = I->first->getDebugExpression();
+          const DIExpression *Fragment = I->first->getDebugExpression();
           if (std::all_of(Ranges.begin(), I,
                           [&](DbgValueHistoryMap::InstrRange Pred) {
-                return !piecesOverlap(Piece, Pred.first->getDebugExpression());
-              }))
+                            return !fragmentsOverlap(
+                                Fragment, Pred.first->getDebugExpression());
+                          }))
             LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
           else
             break;
@@ -200,8 +202,10 @@ void DebugHandlerBase::endInstruction() {
   assert(CurMI != nullptr);
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
-  if (!CurMI->isDebugValue())
+  if (!CurMI->isDebugValue()) {
     PrevLabel = nullptr;
+    PrevInstBB = CurMI->getParent();
+  }
 
   DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
       LabelsAfterInsn.find(CurMI);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index b8bbcec..c00fa18 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -38,10 +38,12 @@ protected:
   MachineModuleInfo *MMI;
 
   /// Previous instruction's location information. This is used to
-  /// determine label location to indicate scope boundries in dwarf
-  /// debug info.
+  /// determine label location to indicate scope boundaries in debug info.
+  /// We track the previous instruction's source location (if not line 0),
+  /// whether it was a label, and its parent BB.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel = nullptr;
+  const MachineBasicBlock *PrevInstBB = nullptr;
 
   /// This location indicates end of function prologue and beginning of
   /// function body.
@@ -92,13 +94,13 @@ public:
   /// Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  /// Determine the relative position of the pieces described by P1 and P2.
-  /// Returns  -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-  /// 1 if P1 is entirely after P2.
-  static int pieceCmp(const DIExpression *P1, const DIExpression *P2);
+  /// Determine the relative position of the fragments described by P1 and P2.
+  /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is
+  /// entirely after P2.
+  static int fragmentCmp(const DIExpression *P1, const DIExpression *P2);
 
-  /// Determine whether two variable pieces overlap.
-  static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2);
+  /// Determine whether two variable fragments overlap.
+  static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2);
 
   /// If this type is derived from a base type then return base type size.
   static uint64_t getBaseTypeSize(const DITypeRef TyRef);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 20acd45..36fb150 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -72,7 +72,7 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    bool isFragment() const { return getExpression()->isFragment(); }
     const DIExpression *getExpression() const { return Expression; }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
@@ -128,8 +128,8 @@ public:
   void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
     Values.append(Vals.begin(), Vals.end());
     sortUniqueValues();
-    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
-          return V.isBitPiece();
+    assert(all_of(Values, [](DebugLocEntry::Value V) {
+          return V.isFragment();
         }) && "value must be a piece");
   }
 
@@ -172,11 +172,11 @@ inline bool operator==(const DebugLocEntry::Value &A,
   llvm_unreachable("unhandled EntryKind");
 }
 
-/// \brief Compare two pieces based on their offset.
+/// Compare two fragments based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression()->getBitPieceOffset() <
-         B.getExpression()->getBitPieceOffset();
+  return A.getExpression()->getFragmentInfo()->OffsetInBits <
+         B.getExpression()->getFragmentInfo()->OffsetInBits;
 }
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 4ad3e18..9c324ea 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -221,9 +221,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
       Asm->EmitInt32((*HI)->Data.Values.size());
       for (HashDataContents *HD : (*HI)->Data.Values) {
         // Emit the DIE offset
-        DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
-        assert(CU && "Accelerated DIE should belong to a CU.");
-        Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
+        Asm->EmitInt32(HD->Die->getDebugSectionOffset());
         // If we have multiple Atoms emit that info too.
         // FIXME: A bit of a hack, we either emit only one atom or all info.
         if (HeaderData.Atoms.size() > 1) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4d81441..05ac1cb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -126,8 +126,7 @@ public:
     uint16_t type; // enum AtomType
     uint16_t form; // DWARF DW_FORM_ defines
 
-    LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
-        : type(type), form(form) {}
+    constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
 #ifndef NDEBUG
     void print(raw_ostream &O) {
       O << "Type: " << dwarf::AtomTypeString(type) << "\n"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2eae1b2..e08306b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,16 +39,16 @@
 using namespace llvm;
 
 DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
-    : EHStreamer(A), shouldEmitCFI(false) {}
+    : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {}
 
 void DwarfCFIExceptionBase::markFunctionEnd() {
   endFragment();
 
-  if (MMI->getLandingPads().empty())
-    return;
-
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  if (!Asm->MF->getLandingPads().empty()) {
+    MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF);
+    NonConstMF->tidyLandingPads();
+  }
 }
 
 void DwarfCFIExceptionBase::endFragment() {
@@ -59,7 +59,7 @@ void DwarfCFIExceptionBase::endFragment() {
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
     : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
       forceEmitPersonality(false), shouldEmitLSDA(false),
-      shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+      shouldEmitMoves(false) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
@@ -70,9 +70,6 @@ void DwarfCFIException::endModule() {
   if (!Asm->MAI->usesCFIForEH())
     return;
 
-  if (moveTypeModule == AsmPrinter::CFI_M_Debug)
-    Asm->OutStreamer->EmitCFISections(false, true);
-
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -98,14 +95,10 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
 
   // If any landing pads survive, we need an EH table.
-  bool hasLandingPads = !MMI->getLandingPads().empty();
+  bool hasLandingPads = !MF->getLandingPads().empty();
 
   // See if we need frame move info.
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
-  if (MoveType == AsmPrinter::CFI_M_EH ||
-      (MoveType == AsmPrinter::CFI_M_Debug &&
-       moveTypeModule == AsmPrinter::CFI_M_None))
-    moveTypeModule = MoveType;
 
   shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
 
@@ -143,6 +136,12 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
   if (!shouldEmitCFI)
     return;
 
+  if (!hasEmittedCFISections) {
+    if (Asm->needsOnlyDebugCFIMoves())
+      Asm->OutStreamer->EmitCFISections(false, true);
+    hasEmittedCFISections = true;
+  }
+
   Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
 
   // Indicate personality routine, if any.
@@ -160,8 +159,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const MCSymbol *Sym =
-      TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI);
+  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI);
   Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
 
   // Provide LSDA information.
@@ -171,7 +169,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void DwarfCFIException::endFunction(const MachineFunction *) {
+void DwarfCFIException::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality)
     return;
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 7822814c..d904372 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -73,36 +73,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
       Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
 }
 
-// Return const expression if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
-  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
-  if (!CE || CE->getNumOperands() != 3 ||
-      CE->getOpcode() != Instruction::GetElementPtr)
-    return nullptr;
-
-  // First operand points to a global struct.
-  Value *Ptr = CE->getOperand(0);
-  GlobalValue *GV = dyn_cast<GlobalValue>(Ptr);
-  if (!GV || !isa<StructType>(GV->getValueType()))
-    return nullptr;
-
-  // Second operand is zero.
-  const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
-  if (!CI || !CI->isZero())
-    return nullptr;
-
-  // Third operand is offset.
-  if (!isa<ConstantInt>(CE->getOperand(2)))
-    return nullptr;
-
-  return CE;
-}
-
-/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
 DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
-    const DIGlobalVariable *GV) {
+    const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
   // Check for pre-existence.
   if (DIE *Die = getDIE(GV))
     return Die;
@@ -126,6 +98,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+    // If the global variable's type is different from the one in the class
+    // member type, assume that it's more specific and also emit it.
+    if (GTy != DD->resolve(SDMDecl->getBaseType()))
+      addType(*VariableDIE, GTy);
   } else {
     DeclContext = GV->getScope();
     // Add name and type.
@@ -145,73 +121,82 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
   else
     addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
+  if (uint32_t AlignInBytes = GV->getAlignInBytes())
+    addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+            AlignInBytes);
+
   // Add location.
   bool addToAccelTable = false;
-  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
-    // We cannot describe the location of dllimport'd variables: the computation
-    // of their address requires loads from the IAT.
-    if (!Global->hasDLLImportStorageClass()) {
+  DIELoc *Loc = nullptr;
+  std::unique_ptr<DIEDwarfExpression> DwarfExpr;
+  bool AllConstant = std::all_of(
+      GlobalExprs.begin(), GlobalExprs.end(),
+      [&](const GlobalExpr GE) {
+        return GE.Expr && GE.Expr->isConstant();
+      });
+
+  for (const auto &GE : GlobalExprs) {
+    const GlobalVariable *Global = GE.Var;
+    const DIExpression *Expr = GE.Expr;
+    // For compatibility with DWARF 3 and earlier,
+    // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
+    // DW_AT_const_value(X).
+    if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+      addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
+      // We cannot describe the location of dllimport'd variables: the
+      // computation of their address requires loads from the IAT.
+    } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
+      if (!Loc) {
+        Loc = new (DIEValueAllocator) DIELoc;
+        DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+      }
       addToAccelTable = true;
-      DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-      const MCSymbol *Sym = Asm->getSymbol(Global);
-      if (Global->isThreadLocal()) {
-        if (Asm->TM.Options.EmulatedTLS) {
-          // TODO: add debug info for emulated thread local mode.
-        } else {
-          // FIXME: Make this work with -gsplit-dwarf.
-          unsigned PointerSize = Asm->getDataLayout().getPointerSize();
-          assert((PointerSize == 4 || PointerSize == 8) &&
-                 "Add support for other sizes if necessary");
-          // Based on GCC's support for TLS:
-          if (!DD->useSplitDwarf()) {
-            // 1) Start with a constNu of the appropriate pointer size
-            addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
-                                                    ? dwarf::DW_OP_const4u
-                                                    : dwarf::DW_OP_const8u);
-            // 2) containing the (relocated) offset of the TLS variable
-            //    within the module's TLS block.
-            addExpr(*Loc, dwarf::DW_FORM_udata,
-                    Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+      if (Global) {
+        const MCSymbol *Sym = Asm->getSymbol(Global);
+        if (Global->isThreadLocal()) {
+          if (Asm->TM.Options.EmulatedTLS) {
+            // TODO: add debug info for emulated thread local mode.
           } else {
-            addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
-            addUInt(*Loc, dwarf::DW_FORM_udata,
-                    DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+            // FIXME: Make this work with -gsplit-dwarf.
+            unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+            assert((PointerSize == 4 || PointerSize == 8) &&
+                   "Add support for other sizes if necessary");
+            // Based on GCC's support for TLS:
+            if (!DD->useSplitDwarf()) {
+              // 1) Start with a constNu of the appropriate pointer size
+              addUInt(*Loc, dwarf::DW_FORM_data1,
+                      PointerSize == 4 ? dwarf::DW_OP_const4u
+                                       : dwarf::DW_OP_const8u);
+              // 2) containing the (relocated) offset of the TLS variable
+              //    within the module's TLS block.
+              addExpr(*Loc, dwarf::DW_FORM_udata,
+                      Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+            } else {
+              addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+              addUInt(*Loc, dwarf::DW_FORM_udata,
+                      DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+            }
+            // 3) followed by an OP to make the debugger do a TLS lookup.
+            addUInt(*Loc, dwarf::DW_FORM_data1,
+                    DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+                                          : dwarf::DW_OP_form_tls_address);
           }
-          // 3) followed by an OP to make the debugger do a TLS lookup.
-          addUInt(*Loc, dwarf::DW_FORM_data1,
-                  DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
-                                        : dwarf::DW_OP_form_tls_address);
+        } else {
+          DD->addArangeLabel(SymbolCU(this, Sym));
+          addOpAddress(*Loc, Sym);
         }
-      } else {
-        DD->addArangeLabel(SymbolCU(this, Sym));
-        addOpAddress(*Loc, Sym);
       }
-
-      addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-      if (DD->useAllLinkageNames())
-        addLinkageName(*VariableDIE, GV->getLinkageName());
-    }
-  } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
-    addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
-    auto *Ptr = cast<GlobalValue>(CE->getOperand(0));
-    if (!Ptr->hasDLLImportStorageClass()) {
-      addToAccelTable = true;
-      // GV is a merged global.
-      DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-      MCSymbol *Sym = Asm->getSymbol(Ptr);
-      DD->addArangeLabel(SymbolCU(this, Sym));
-      addOpAddress(*Loc, Sym);
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-      SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
-      addUInt(*Loc, dwarf::DW_FORM_udata,
-              Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(),
-                                                          Idx));
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-      addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+      if (Expr) {
+        DwarfExpr->addFragmentOffset(Expr);
+        DwarfExpr->AddExpression(Expr);
+      }
     }
   }
+  if (Loc)
+    addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
+
+  if (DD->useAllLinkageNames())
+    addLinkageName(*VariableDIE, GV->getLinkageName());
 
   if (addToAccelTable) {
     DD->addAccelName(GV->getName(), *VariableDIE);
@@ -265,7 +250,7 @@ void DwarfCompileUnit::initStmtList() {
   // is not okay to use line_table_start here.
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   StmtListValue =
-      addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+      addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
                       TLOF.getDwarfLineSection()->getBeginSymbol());
 }
 
@@ -450,7 +435,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
           getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
-  if (IA->getDiscriminator())
+  if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
     addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
             IA->getDiscriminator());
 
@@ -521,9 +506,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
         DIELoc *Loc = new (DIEValueAllocator) DIELoc;
         DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
         // If there is an expression, emit raw unsigned bytes.
+        DwarfExpr.addFragmentOffset(Expr);
         DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
-        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
-        addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+        DwarfExpr.AddExpression(Expr);
+        addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
       } else
         addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
     } else if (DVInsn->getOperand(0).isFPImm())
@@ -536,23 +522,21 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   }
 
   // .. else use frame index.
-  if (DV.getFrameIndex().empty())
+  if (!DV.hasFrameIndexExprs())
     return VariableDie;
 
-  auto Expr = DV.getExpression().begin();
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
-  for (auto FI : DV.getFrameIndex()) {
+  for (auto &Fragment : DV.getFrameIndexExprs()) {
     unsigned FrameReg = 0;
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
-    int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-    assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
+    int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+    DwarfExpr.addFragmentOffset(Fragment.Expr);
     DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
                                     FrameReg, Offset);
-    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
-    ++Expr;
+    DwarfExpr.AddExpression(Fragment.Expr);
   }
-  addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+  addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
 
   return VariableDie;
 }
@@ -585,25 +569,22 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
   return ObjectPointer;
 }
 
-void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
-  assert(Scope && Scope->getScopeNode());
-  assert(!Scope->getInlinedAt());
-  assert(!Scope->isAbstractScope());
-  auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
-
-  DD->getProcessedSPNodes().insert(Sub);
-
+void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
+  if (Scope) {
+    assert(!Scope->getInlinedAt());
+    assert(!Scope->isAbstractScope());
+    // Collect lexical scope children first.
+    // ObjectPointer might be a local (non-argument) local variable if it's a
+    // block's synthetic this pointer.
+    if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+      addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+  }
+
   // If this is a variadic function, add an unspecified parameter.
   DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
 
-  // Collect lexical scope children first.
-  // ObjectPointer might be a local (non-argument) local variable if it's a
-  // block's synthetic this pointer.
-  if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
-    addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
-
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
@@ -674,7 +655,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
   else if (auto *T = dyn_cast<DIType>(Entity))
     EntityDie = getOrCreateTypeDIE(T);
   else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
-    EntityDie = getOrCreateGlobalVariableDIE(GV);
+    EntityDie = getOrCreateGlobalVariableDIE(GV, {});
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
@@ -695,11 +676,7 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
       // If this subprogram has an abstract definition, reference that
       addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
   } else {
-    if (!D && !includeMinimalInlineScopes())
-      // Lazily construct the subprogram if we didn't see either concrete or
-      // inlined versions during codegen. (except in -gmlt ^ where we want
-      // to omit these entirely)
-      D = getOrCreateSubprogramDIE(SP);
+    assert(D || includeMinimalInlineScopes());
     if (D)
       // And attach the attributes
       applySubprogramAttributesToDefinition(SP, *D);
@@ -750,18 +727,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
 void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
                                   const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+  DIEDwarfExpression Expr(*Asm, *this, *Loc);
 
   bool validReg;
   if (Location.isReg())
-    validReg = addRegisterOpPiece(*Loc, Location.getReg());
+    validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                  Location.getReg());
   else
-    validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+    validReg =
+        Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                   Location.getReg(), Location.getOffset());
 
   if (!validReg)
     return;
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Loc);
+  addBlock(Die, Attribute, Expr.finalize());
 }
 
 /// Start with the address based on the location provided, and generate the
@@ -774,19 +755,22 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
   const DIExpression *Expr = DV.getSingleExpression();
-  bool ValidReg;
+  DIExpressionCursor ExprCursor(Expr);
   const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
-  if (Location.getOffset()) {
-    ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(),
-                                               Location.getOffset());
-    if (ValidReg)
-      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
-  } else
-    ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg());
+  auto Reg = Location.getReg();
+  DwarfExpr.addFragmentOffset(Expr);
+  bool ValidReg =
+      Location.getOffset()
+          ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
+          : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
+
+  if (!ValidReg)
+    return;
+
+  DwarfExpr.AddExpression(std::move(ExprCursor));
 
   // Now attach the location information to the DIE.
-  if (ValidReg)
-    addBlock(Die, Attribute, Loc);
+  addBlock(Die, Attribute, Loc);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
@@ -802,7 +786,13 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
   StringRef Name = Var.getName();
   if (!Name.empty())
     addString(VariableDie, dwarf::DW_AT_name, Name);
-  addSourceLine(VariableDie, Var.getVariable());
+  const auto *DIVar = Var.getVariable();
+  if (DIVar)
+    if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
+      addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+              AlignInBytes);
+
+  addSourceLine(VariableDie, DIVar);
   addType(VariableDie, Var.getType());
   if (Var.isArtificial())
     addFlag(VariableDie, dwarf::DW_AT_artificial);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 90f74a3..a8025f1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -32,9 +32,6 @@ class DwarfCompileUnit : public DwarfUnit {
   /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
 
-  /// Offset of the UnitDie from beginning of debug info section.
-  unsigned DebugInfoOffset = 0;
-
   /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
   /// the need to search for it in applyStmtList.
   DIE::value_iterator StmtListValue;
@@ -84,8 +81,6 @@ public:
                    DwarfDebug *DW, DwarfFile *DWU);
 
   unsigned getUniqueID() const { return UniqueID; }
-  unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
-  void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
 
   DwarfCompileUnit *getSkeleton() const {
     return Skeleton;
@@ -96,8 +91,16 @@ public:
   /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
   void applyStmtList(DIE &D);
 
-  /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-  DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
+  /// A pair of GlobalVariable and DIExpression.
+  struct GlobalExpr {
+    const GlobalVariable *Var;
+    const DIExpression *Expr;
+  };
+
+  /// Get or create global variable DIE.
+  DIE *
+  getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
+                               ArrayRef<GlobalExpr> GlobalExprs);
 
   /// addLabelAddress - Add a dwarf label attribute data and value using
   /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
@@ -176,7 +179,7 @@ public:
                               unsigned *ChildScopeCount = nullptr);
 
   /// \brief Construct a DIE for this subprogram scope.
-  void constructSubprogramScopeDIE(LexicalScope *Scope);
+  void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
 
   DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
 
@@ -190,20 +193,15 @@ public:
   /// Set the skeleton unit associated with this unit.
   void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
 
-  const MCSymbol *getSectionSym() const {
-    assert(Section);
-    return Section->getBeginSymbol();
-  }
-
   unsigned getLength() {
     return sizeof(uint32_t) + // Length field
-        getHeaderSize() + UnitDie.getSize();
+        getHeaderSize() + getUnitDie().getSize();
   }
 
   void emitHeader(bool UseOffsets) override;
 
   MCSymbol *getLabelBegin() const {
-    assert(Section);
+    assert(getSection());
     return LabelBegin;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7fba768..91a3d09 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -62,11 +62,6 @@ static cl::opt<bool>
 DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
                          cl::desc("Disable debug info printing"));
 
-static cl::opt<bool> UnknownLocations(
-    "use-unknown-locations", cl::Hidden,
-    cl::desc("Make an absence of debug location information explicit."),
-    cl::init(false));
-
 static cl::opt<bool>
 GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
                        cl::desc("Generate GNU-style pubnames and pubtypes"),
@@ -81,12 +76,19 @@ namespace {
 enum DefaultOnOff { Default, Enable, Disable };
 }
 
+static cl::opt<DefaultOnOff> UnknownLocations(
+    "use-unknown-locations", cl::Hidden,
+    cl::desc("Make an absence of debug location information explicit."),
+    cl::values(clEnumVal(Default, "At top of block or after label"),
+               clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
+    cl::init(Default));
+
 static cl::opt<DefaultOnOff>
 DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
                  cl::desc("Output prototype dwarf accelerator tables."),
                  cl::values(clEnumVal(Default, "Default for platform"),
                             clEnumVal(Enable, "Enabled"),
-                            clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                            clEnumVal(Disable, "Disabled")),
                  cl::init(Default));
 
 static cl::opt<DefaultOnOff>
@@ -94,7 +96,7 @@ SplitDwarf("split-dwarf", cl::Hidden,
            cl::desc("Output DWARF5 split debug info."),
            cl::values(clEnumVal(Default, "Default for platform"),
                       clEnumVal(Enable, "Enabled"),
-                      clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                      clEnumVal(Disable, "Disabled")),
            cl::init(Default));
 
 static cl::opt<DefaultOnOff>
@@ -102,7 +104,7 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                  cl::desc("Generate DWARF pubnames and pubtypes sections"),
                  cl::values(clEnumVal(Default, "Default for platform"),
                             clEnumVal(Enable, "Enabled"),
-                            clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                            clEnumVal(Disable, "Disabled")),
                  cl::init(Default));
 
 enum LinkageNameOption {
@@ -117,12 +119,13 @@ static cl::opt<LinkageNameOption>
                                             "Default for platform"),
                                  clEnumValN(AllLinkageNames, "All", "All"),
                                  clEnumValN(AbstractLinkageNames, "Abstract",
-                                            "Abstract subprograms"),
-                                 clEnumValEnd),
+                                            "Abstract subprograms")),
                       cl::init(DefaultLinkageNames));
 
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "DWARF Debug Writer";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "writer";
+static const char *const DbgTimerDescription = "DWARF Debug Writer";
 
 void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
   BS.EmitInt8(
@@ -196,7 +199,16 @@ const DIType *DbgVariable::getType() const {
   return Ty;
 }
 
-static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
+ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+  std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+            [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
+              return A.Expr->getFragmentInfo()->OffsetInBits <
+                     B.Expr->getFragmentInfo()->OffsetInBits;
+            });
+  return FrameIndexExprs;
+}
+
+static const DwarfAccelTable::Atom TypeAtoms[] = {
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
     DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
@@ -205,7 +217,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
       InfoHolder(A, "info_string", DIEValueAllocator),
       SkeletonHolder(A, "skel_string", DIEValueAllocator),
-      IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+      IsDarwin(A->TM.getTargetTriple().isOSDarwin()),
       AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                        dwarf::DW_FORM_data4)),
       AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -215,7 +227,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
       AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
 
   CurFn = nullptr;
-  Triple TT(Asm->getTargetTriple());
+  const Triple &TT = Asm->TM.getTargetTriple();
 
   // Make sure we know our "debugger tuning."  The target option takes
   // precedence; fall back to triple-based defaults.
@@ -255,7 +267,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
 
   unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
-  DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+  unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
   // Use dwarf 4 by default if nothing is requested.
   DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
@@ -349,10 +361,11 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
   return !getLabelAfterInsn(Ranges.front().second);
 }
 
-template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
   F(CU);
   if (auto *SkelCU = CU.getSkeleton())
-    F(*SkelCU);
+    if (CU.getCUNode()->getSplitDebugInlining())
+      F(*SkelCU);
 }
 
 void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
@@ -360,13 +373,13 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope->isAbstractScope());
   assert(!Scope->getInlinedAt());
 
-  const MDNode *SP = Scope->getScopeNode();
+  auto *SP = cast<DISubprogram>(Scope->getScopeNode());
 
   ProcessedSPNodes.insert(SP);
 
   // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
   // was inlined from another compile unit.
-  auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit());
+  auto &CU = *CUMap.lookup(SP->getUnit());
   forBothCUs(CU, [&](DwarfCompileUnit &CU) {
     CU.constructAbstractSubprogramScopeDIE(Scope);
   });
@@ -435,9 +448,9 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
   }
 
   if (useSplitDwarf())
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
   else
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   if (DIUnit->getDWOId()) {
     // This CU is either a clang module DWO or a skeleton CU.
@@ -449,8 +462,8 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
                       DIUnit->getSplitDebugFilename());
   }
 
-  CUMap.insert(std::make_pair(DIUnit, &NewCU));
-  CUDieMap.insert(std::make_pair(&Die, &NewCU));
+  CUMap.insert({DIUnit, &NewCU});
+  CUDieMap.insert({&Die, &NewCU});
   return NewCU;
 }
 
@@ -460,11 +473,34 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
     D->addChild(TheCU.constructImportedEntityDIE(N));
 }
 
+/// Sort and unique GVEs by comparing their fragment offset.
+static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
+sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
+  std::sort(GVEs.begin(), GVEs.end(),
+            [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+              if (A.Expr != B.Expr && A.Expr && B.Expr) {
+		auto FragmentA = A.Expr->getFragmentInfo();
+		auto FragmentB = B.Expr->getFragmentInfo();
+		if (FragmentA && FragmentB)
+		  return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+	      }
+              return false;
+            });
+  GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
+                         [](DwarfCompileUnit::GlobalExpr A,
+                            DwarfCompileUnit::GlobalExpr B) {
+                           return A.Expr == B.Expr;
+                         }),
+             GVEs.end());
+  return GVEs;
+}
+
 // Emit all Dwarf sections that should come prior to the content. Create
 // global DIEs and emit initial debug info sections. This is invoked by
 // the target AsmPrinter.
 void DwarfDebug::beginModule() {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
+                     DWARFGroupDescription, TimePassesIsEnabled);
   if (DisableDebugInfoPrinting)
     return;
 
@@ -475,13 +511,30 @@ void DwarfDebug::beginModule() {
   // Tell MMI whether we have debug info.
   MMI->setDebugInfoAvailability(NumDebugCUs > 0);
   SingleCU = NumDebugCUs == 1;
+  DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
+      GVMap;
+  for (const GlobalVariable &Global : M->globals()) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVs;
+    Global.getDebugInfo(GVs);
+    for (auto *GVE : GVs)
+      GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
+  }
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
     for (auto *IE : CUNode->getImportedEntities())
       CU.addImportedEntity(IE);
-    for (auto *GV : CUNode->getGlobalVariables())
-      CU.getOrCreateGlobalVariableDIE(GV);
+
+    // Global Variables.
+    for (auto *GVE : CUNode->getGlobalVariables())
+      GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
+    DenseSet<DIGlobalVariable *> Processed;
+    for (auto *GVE : CUNode->getGlobalVariables()) {
+      DIGlobalVariable *GV = GVE->getVariable();
+      if (Processed.insert(GV).second)
+        CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
+    }
+
     for (auto *Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
@@ -509,7 +562,7 @@ void DwarfDebug::finishVariableDefinitions() {
     // FIXME: Consider the time-space tradeoff of just storing the unit pointer
     // in the ConcreteVariables list, rather than looking it up again here.
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
-    DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+    DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
     assert(Unit);
     DbgVariable *AbsVar = getExistingAbstractVariable(
         InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
@@ -522,13 +575,11 @@ void DwarfDebug::finishVariableDefinitions() {
 }
 
 void DwarfDebug::finishSubprogramDefinitions() {
-  for (auto &F : MMI->getModule()->functions())
-    if (auto *SP = F.getSubprogram())
-      if (ProcessedSPNodes.count(SP) &&
-          SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
-        forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
-          CU.finishSubprogramDefinition(SP);
-        });
+  for (const DISubprogram *SP : ProcessedSPNodes)
+    if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
+      forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
+        CU.finishSubprogramDefinition(SP);
+      });
 }
 
 void DwarfDebug::finalizeModuleInfo() {
@@ -715,10 +766,10 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
     createAbstractVariable(Cleansed, Scope);
 }
 
-// Collect variable information from side table maintained by MMI.
-void DwarfDebug::collectVariableInfoFromMMITable(
+// Collect variable information from side table maintained by MF.
+void DwarfDebug::collectVariableInfoFromMFTable(
     DenseSet<InlinedVariable> &Processed) {
-  for (const auto &VI : MMI->getVariableDbgInfo()) {
+  for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -765,7 +816,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
   llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
 }
 
-/// \brief If this and Next are describing different pieces of the same
+/// \brief If this and Next are describing different fragments of the same
 /// variable, merge them by appending Next's values to the current
 /// list of values.
 /// Return true if the merge was successful.
@@ -773,15 +824,15 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
   if (Begin == Next.Begin) {
     auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
     auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
-    if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece())
+    if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
       return false;
 
-    // We can only merge entries if none of the pieces overlap any others.
+    // We can only merge entries if none of the fragments overlap any others.
     // In doing so, we can take advantage of the fact that both lists are
     // sorted.
     for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
       for (; j < Next.Values.size(); ++j) {
-        int res = DebugHandlerBase::pieceCmp(
+        int res = DebugHandlerBase::fragmentCmp(
             cast<DIExpression>(Values[i].Expression),
             cast<DIExpression>(Next.Values[j].Expression));
         if (res == 0) // The two expressions overlap, we can't merge.
@@ -804,27 +855,27 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
 
 /// Build the location list for all DBG_VALUEs in the function that
 /// describe the same variable.  If the ranges of several independent
-/// pieces of the same variable overlap partially, split them up and
+/// fragments of the same variable overlap partially, split them up and
 /// combine the ranges. The resulting DebugLocEntries are will have
 /// strict monotonically increasing begin addresses and will never
 /// overlap.
 //
 // Input:
 //
-//   Ranges History [var, loc, piece ofs size]
-// 0 |      [x, (reg0, piece 0, 32)]
-// 1 | |    [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+//   Ranges History [var, loc, fragment ofs size]
+// 0 |      [x, (reg0, fragment 0, 32)]
+// 1 | |    [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
 // 2 | |    ...
 // 3   |    [clobber reg0]
-// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+// 4        [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
 //                                     x.
 //
 // Output:
 //
-// [0-1]    [x, (reg0, piece  0, 32)]
-// [1-3]    [x, (reg0, piece  0, 32), (reg1, piece 32, 32)]
-// [3-4]    [x, (reg1, piece 32, 32)]
-// [4- ]    [x, (mem,  piece  0, 64)]
+// [0-1]    [x, (reg0, fragment  0, 32)]
+// [1-3]    [x, (reg0, fragment  0, 32), (reg1, fragment 32, 32)]
+// [3-4]    [x, (reg1, fragment 32, 32)]
+// [4- ]    [x, (mem,  fragment  0, 64)]
 void
 DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
                               const DbgValueHistoryMap::InstrRanges &Ranges) {
@@ -842,11 +893,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
       continue;
     }
 
-    // If this piece overlaps with any open ranges, truncate them.
+    // If this fragment overlaps with any open ranges, truncate them.
     const DIExpression *DIExpr = Begin->getDebugExpression();
-    auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
-                               [&](DebugLocEntry::Value R) {
-      return piecesOverlap(DIExpr, R.getExpression());
+    auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
+      return fragmentsOverlap(DIExpr, R.getExpression());
     });
     OpenRanges.erase(Last, OpenRanges.end());
 
@@ -868,12 +918,12 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     DebugLocEntry Loc(StartLabel, EndLabel, Value);
     bool couldMerge = false;
 
-    // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr->isBitPiece()) {
+    // If this is a fragment, it may belong to the current DebugLocEntry.
+    if (DIExpr->isFragment()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
-      // Attempt to add the piece to the last entry.
+      // Attempt to add the fragment to the last entry.
       if (!DebugLoc.empty())
         if (DebugLoc.back().MergeValues(Loc))
           couldMerge = true;
@@ -881,7 +931,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
 
     if (!couldMerge) {
       // Need to add a new DebugLocEntry. Add all values from still
-      // valid non-overlapping pieces.
+      // valid non-overlapping fragments.
       if (OpenRanges.size())
         Loc.addValues(OpenRanges);
 
@@ -929,7 +979,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
                                      const DISubprogram *SP,
                                      DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
-  collectVariableInfoFromMMITable(Processed);
+  collectVariableInfoFromMFTable(Processed);
 
   for (const auto &I : DbgValues) {
     InlinedVariable IV = I.first;
@@ -996,30 +1046,82 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   DebugHandlerBase::beginInstruction(MI);
   assert(CurMI);
 
-  // Check if source location changes, but ignore DBG_VALUE locations.
-  if (!MI->isDebugValue()) {
-    const DebugLoc &DL = MI->getDebugLoc();
-    if (DL != PrevInstLoc) {
-      if (DL) {
-        unsigned Flags = 0;
-        PrevInstLoc = DL;
-        if (DL == PrologEndLoc) {
-          Flags |= DWARF2_FLAG_PROLOGUE_END;
-          PrologEndLoc = DebugLoc();
-          Flags |= DWARF2_FLAG_IS_STMT;
-        }
-        if (DL.getLine() !=
-            Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
-          Flags |= DWARF2_FLAG_IS_STMT;
-
-        const MDNode *Scope = DL.getScope();
-        recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
-      } else if (UnknownLocations) {
-        PrevInstLoc = DL;
-        recordSourceLine(0, 0, nullptr, 0);
+  // Check if source location changes, but ignore DBG_VALUE and CFI locations.
+  if (MI->isDebugValue() || MI->isCFIInstruction())
+    return;
+  const DebugLoc &DL = MI->getDebugLoc();
+  // When we emit a line-0 record, we don't update PrevInstLoc; so look at
+  // the last line number actually emitted, to see if it was line 0.
+  unsigned LastAsmLine =
+      Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+
+  if (DL == PrevInstLoc) {
+    // If we have an ongoing unspecified location, nothing to do here.
+    if (!DL)
+      return;
+    // We have an explicit location, same as the previous location.
+    // But we might be coming back to it after a line 0 record.
+    if (LastAsmLine == 0 && DL.getLine() != 0) {
+      // Reinstate the source location but not marked as a statement.
+      const MDNode *Scope = DL.getScope();
+      recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
+    }
+    return;
+  }
+
+  if (!DL) {
+    // We have an unspecified location, which might want to be line 0.
+    // If we have already emitted a line-0 record, don't repeat it.
+    if (LastAsmLine == 0)
+      return;
+    // If user said Don't Do That, don't do that.
+    if (UnknownLocations == Disable)
+      return;
+    // See if we have a reason to emit a line-0 record now.
+    // Reasons to emit a line-0 record include:
+    // - User asked for it (UnknownLocations).
+    // - Instruction has a label, so it's referenced from somewhere else,
+    //   possibly debug information; we want it to have a source location.
+    // - Instruction is at the top of a block; we don't want to inherit the
+    //   location from the physically previous (maybe unrelated) block.
+    if (UnknownLocations == Enable || PrevLabel ||
+        (PrevInstBB && PrevInstBB != MI->getParent())) {
+      // Preserve the file and column numbers, if we can, to save space in
+      // the encoded line table.
+      // Do not update PrevInstLoc, it remembers the last non-0 line.
+      const MDNode *Scope = nullptr;
+      unsigned Column = 0;
+      if (PrevInstLoc) {
+        Scope = PrevInstLoc.getScope();
+        Column = PrevInstLoc.getCol();
       }
+      recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
     }
+    return;
+  }
+
+  // We have an explicit location, different from the previous location.
+  // Don't repeat a line-0 record, but otherwise emit the new location.
+  // (The new location might be an explicit line 0, which we do emit.)
+  if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+    return;
+  unsigned Flags = 0;
+  if (DL == PrologEndLoc) {
+    Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
+    PrologEndLoc = DebugLoc();
   }
+  // If the line changed, we call that a new statement; unless we went to
+  // line 0 and came back, in which case it is not a new statement.
+  unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
+  if (DL.getLine() && DL.getLine() != OldLine)
+    Flags |= DWARF2_FLAG_IS_STMT;
+
+  const MDNode *Scope = DL.getScope();
+  recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+
+  // If we're not at line 0, remember this location.
+  if (DL.getLine())
+    PrevInstLoc = DL;
 }
 
 static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
@@ -1093,18 +1195,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
       "endFunction should be called with the same function as beginFunction");
 
   const DISubprogram *SP = MF->getFunction()->getSubprogram();
-  if (!MMI->hasDebugInfo() || LScopes.empty() || !SP ||
+  if (!MMI->hasDebugInfo() || !SP ||
       SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
-    // If we don't have a lexical scope for this function then there will
-    // be a hole in the range information. Keep note of this by setting the
-    // previously used section to nullptr.
+    // If we don't have a subprogram for this function then there will be a hole
+    // in the range information. Keep note of this by setting the previously
+    // used section to nullptr.
     PrevCU = nullptr;
     CurFn = nullptr;
     DebugHandlerBase::endFunction(MF);
-    // Mark functions with no debug info on any instructions, but a
-    // valid DISubprogram as processed.
-    if (SP)
-      ProcessedSPNodes.insert(SP);
     return;
   }
 
@@ -1112,7 +1210,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  SP = cast<DISubprogram>(FnScope->getScopeNode());
+  assert(!FnScope || SP == FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
 
   DenseSet<InlinedVariable> ProcessedVars;
@@ -1154,10 +1252,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     constructAbstractSubprogramScopeDIE(AScope);
   }
 
-  TheCU.constructSubprogramScopeDIE(FnScope);
+  ProcessedSPNodes.insert(SP);
+  TheCU.constructSubprogramScopeDIE(SP, FnScope);
   if (auto *SkelCU = TheCU.getSkeleton())
-    if (!LScopes.getAbstractScopesList().empty())
-      SkelCU->constructSubprogramScopeDIE(FnScope);
+    if (!LScopes.getAbstractScopesList().empty() &&
+        TheCU.getCUNode()->getSplitDebugInlining())
+      SkelCU->constructSubprogramScopeDIE(SP, FnScope);
 
   // Clear debug info
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
@@ -1181,7 +1281,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
     Fn = Scope->getFilename();
     Dir = Scope->getDirectory();
     if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
-      Discriminator = LBF->getDiscriminator();
+      if (getDwarfVersion() >= 4)
+        Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1396,9 +1497,9 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
 static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
                               ByteStreamer &Streamer,
                               const DebugLocEntry::Value &Value,
-                              unsigned PieceOffsetInBits) {
-  DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(),
-                                    Streamer);
+                              DwarfExpression &DwarfExpr) {
+  DIExpressionCursor ExprCursor(Value.getExpression());
+  DwarfExpr.addFragmentOffset(Value.getExpression());
   // Regular entry.
   if (Value.isInt()) {
     if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
@@ -1408,25 +1509,16 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
-    const DIExpression *Expr = Value.getExpression();
-    if (!Expr || !Expr->getNumElements())
-      // Regular entry.
-      AP.EmitDwarfRegOp(Streamer, Loc);
-    else {
-      // Complex address entry.
-      const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
-      if (Loc.getOffset()) {
-        DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
-                                PieceOffsetInBits);
-      } else
-        DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(),
-                                          PieceOffsetInBits);
-    }
+    const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+    if (Loc.getOffset())
+      DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
+    else
+      DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
   } else if (Value.isConstantFP()) {
     APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
     DwarfExpr.AddUnsignedConstant(RawBytes);
   }
+  DwarfExpr.AddExpression(std::move(ExprCursor));
 }
 
 void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1434,36 +1526,24 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
                              const DIBasicType *BT) {
   DebugLocStream::EntryBuilder Entry(List, Begin, End);
   BufferByteStreamer Streamer = Entry.getStreamer();
+  DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
   const DebugLocEntry::Value &Value = Values[0];
-  if (Value.isBitPiece()) {
-    // Emit all pieces that belong to the same variable and range.
-    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
-          return P.isBitPiece();
-        }) && "all values are expected to be pieces");
+  if (Value.isFragment()) {
+    // Emit all fragments that belong to the same variable and range.
+    assert(all_of(Values, [](DebugLocEntry::Value P) {
+          return P.isFragment();
+        }) && "all values are expected to be fragments");
     assert(std::is_sorted(Values.begin(), Values.end()) &&
-           "pieces are expected to be sorted");
-   
-    unsigned Offset = 0;
-    for (auto Piece : Values) {
-      const DIExpression *Expr = Piece.getExpression();
-      unsigned PieceOffset = Expr->getBitPieceOffset();
-      unsigned PieceSize = Expr->getBitPieceSize();
-      assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
-      if (Offset < PieceOffset) {
-        // The DWARF spec seriously mandates pieces with no locations for gaps.
-        DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(),
-                                     Streamer);
-        Expr.AddOpPiece(PieceOffset-Offset, 0);
-        Offset += PieceOffset-Offset;
-      }
-      Offset += PieceSize;
+           "fragments are expected to be sorted");
+
+    for (auto Fragment : Values)
+      emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr);
 
-      emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
-    }
   } else {
-    assert(Values.size() == 1 && "only pieces may have >1 value");
-    emitDebugLocValue(AP, BT, Streamer, Value, 0);
+    assert(Values.size() == 1 && "only fragments may have >1 value");
+    emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr);
   }
+  DwarfExpr.finalize();
 }
 
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
@@ -1514,14 +1594,14 @@ void DwarfDebug::emitDebugLocDWO() {
       // rather than two. We could get fancier and try to, say, reuse an
       // address we know we've emitted elsewhere (the start of the function?
       // The start of the CU or CU subrange that encloses this range?)
-      Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
+      Asm->EmitInt8(dwarf::DW_LLE_startx_length);
       unsigned idx = AddrPool.getIndex(Entry.BeginSym);
       Asm->EmitULEB128(idx);
       Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
 
       emitDebugLocEntryLocation(Entry);
     }
-    Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry);
+    Asm->EmitInt8(dwarf::DW_LLE_end_of_list);
   }
 }
 
@@ -1807,7 +1887,7 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
-  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+  NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   NewCU.initStmtList();
 
@@ -1889,8 +1969,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
                                               getDwoLineTable(CU));
   DwarfTypeUnit &NewTU = *OwnedUnit;
   DIE &UnitDie = NewTU.getUnitDie();
-  TypeUnitsUnderConstruction.push_back(
-      std::make_pair(std::move(OwnedUnit), CTy));
+  TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
 
   NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                 CU.getLanguage());
@@ -1900,11 +1979,10 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   Ins.first->second = Signature;
 
   if (useSplitDwarf())
-    NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
   else {
     CU.applyStmtList(UnitDie);
-    NewTU.initSection(
-        Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
   }
 
   NewTU.setType(NewTU.createTypeDIE(CTy));
@@ -1968,3 +2046,7 @@ void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
     return;
   AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
+
+uint16_t DwarfDebug::getDwarfVersion() const {
+  return Asm->OutStreamer->getContext().getDwarfVersion();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6b06757..253e3f0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/CodeGen/DIE.h"
@@ -53,7 +54,7 @@ class MachineModuleInfo;
 ///
 /// Variables can be created from allocas, in which case they're generated from
 /// the MMI table.  Such variables can have multiple expressions and frame
-/// indices.  The \a Expr and \a FrameIndices array must match.
+/// indices.
 ///
 /// Variables can be created from \c DBG_VALUE instructions.  Those whose
 /// location changes over time use \a DebugLocListIndex, while those with a
@@ -63,11 +64,16 @@ class MachineModuleInfo;
 class DbgVariable {
   const DILocalVariable *Var;                /// Variable Descriptor.
   const DILocation *IA;                      /// Inlined at location.
-  SmallVector<const DIExpression *, 1> Expr; /// Complex address.
   DIE *TheDIE = nullptr;                     /// Variable DIE.
   unsigned DebugLocListIndex = ~0u;          /// Offset in DebugLocs.
   const MachineInstr *MInsn = nullptr;       /// DBG_VALUE instruction.
-  SmallVector<int, 1> FrameIndex;            /// Frame index.
+
+  struct FrameIndexExpr {
+    int FI;
+    const DIExpression *Expr;
+  };
+  mutable SmallVector<FrameIndexExpr, 1>
+      FrameIndexExprs; /// Frame index + expression.
 
 public:
   /// Construct a DbgVariable.
@@ -79,21 +85,18 @@ public:
 
   /// Initialize from the MMI table.
   void initializeMMI(const DIExpression *E, int FI) {
-    assert(Expr.empty() && "Already initialized?");
-    assert(FrameIndex.empty() && "Already initialized?");
+    assert(FrameIndexExprs.empty() && "Already initialized?");
     assert(!MInsn && "Already initialized?");
 
     assert((!E || E->isValid()) && "Expected valid expression");
     assert(~FI && "Expected valid index");
 
-    Expr.push_back(E);
-    FrameIndex.push_back(FI);
+    FrameIndexExprs.push_back({FI, E});
   }
 
   /// Initialize from a DBG_VALUE instruction.
   void initializeDbgValue(const MachineInstr *DbgValue) {
-    assert(Expr.empty() && "Already initialized?");
-    assert(FrameIndex.empty() && "Already initialized?");
+    assert(FrameIndexExprs.empty() && "Already initialized?");
     assert(!MInsn && "Already initialized?");
 
     assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
@@ -102,16 +105,15 @@ public:
     MInsn = DbgValue;
     if (auto *E = DbgValue->getDebugExpression())
       if (E->getNumElements())
-        Expr.push_back(E);
+        FrameIndexExprs.push_back({0, E});
   }
 
   // Accessors.
   const DILocalVariable *getVariable() const { return Var; }
   const DILocation *getInlinedAt() const { return IA; }
-  ArrayRef<const DIExpression *> getExpression() const { return Expr; }
   const DIExpression *getSingleExpression() const {
-    assert(MInsn && Expr.size() <= 1);
-    return Expr.size() ? Expr[0] : nullptr;
+    assert(MInsn && FrameIndexExprs.size() <= 1);
+    return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
   }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
@@ -119,7 +121,9 @@ public:
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
   StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
-  ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+  /// Get the FI entries, sorted by fragment offset.
+  ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
+  bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
 
   void addMMIEntry(const DbgVariable &V) {
     assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -127,16 +131,15 @@ public:
     assert(V.Var == Var && "conflicting variable");
     assert(V.IA == IA && "conflicting inlined-at location");
 
-    assert(!FrameIndex.empty() && "Expected an MMI entry");
-    assert(!V.FrameIndex.empty() && "Expected an MMI entry");
-    assert(Expr.size() == FrameIndex.size() && "Mismatched expressions");
-    assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions");
+    assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+    assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
 
-    Expr.append(V.Expr.begin(), V.Expr.end());
-    FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
-    assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) {
-             return E && E->isBitPiece();
-           }) && "conflicting locations for variable");
+    FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
+    assert(all_of(FrameIndexExprs,
+                  [](FrameIndexExpr &FIE) {
+                    return FIE.Expr && FIE.Expr->isFragment();
+                  }) &&
+           "conflicting locations for variable");
   }
 
   // Translate tag to proper Dwarf tag.
@@ -166,11 +169,11 @@ public:
 
   bool hasComplexAddress() const {
     assert(MInsn && "Expected DBG_VALUE, not MMI variable");
-    assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable");
-    assert(
-        (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) &&
-        "Invalid Expr for DBG_VALUE");
-    return !Expr.empty();
+    assert((FrameIndexExprs.empty() ||
+            (FrameIndexExprs.size() == 1 &&
+             FrameIndexExprs[0].Expr->getNumElements())) &&
+           "Invalid Expr for DBG_VALUE");
+    return !FrameIndexExprs.empty();
   }
   bool isBlockByrefVariable() const;
   const DIType *getType() const;
@@ -216,7 +219,9 @@ class DwarfDebug : public DebugHandlerBase {
 
   /// This is a collection of subprogram MDNodes that are processed to
   /// create DIEs.
-  SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+  SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
+            SmallPtrSet<const DISubprogram *, 16>>
+      ProcessedSPNodes;
 
   /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn;
@@ -254,9 +259,6 @@ class DwarfDebug : public DebugHandlerBase {
   /// Whether to emit all linkage names, or just abstract subprograms.
   bool UseAllLinkageNames;
 
-  /// Version of dwarf we're emitting.
-  unsigned DwarfVersion;
-
   /// DWARF5 Experimental Options
   /// @{
   bool HasDwarfAccelTables;
@@ -443,9 +445,8 @@ class DwarfDebug : public DebugHandlerBase {
   void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
                          const DbgValueHistoryMap::InstrRanges &Ranges);
 
-  /// Collect variable information from the side table maintained
-  /// by MMI.
-  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
+  /// Collect variable information from the side table maintained by MF.
+  void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -515,7 +516,7 @@ public:
   bool useSplitDwarf() const { return HasSplitDwarf; }
 
   /// Returns the Dwarf Version.
-  unsigned getDwarfVersion() const { return DwarfVersion; }
+  uint16_t getDwarfVersion() const;
 
   /// Returns the previous CU that was being updated
   const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
@@ -537,11 +538,6 @@ public:
     return Ref.resolve();
   }
 
-  /// Find the DwarfCompileUnit for the given CU Die.
-  DwarfCompileUnit *lookupUnit(const DIE *CU) const {
-    return CUDieMap.lookup(CU);
-  }
-
   void addSubprogramNames(const DISubprogram *SP, DIE &Die);
 
   AddressPool &getAddressPool() { return AddrPool; }
@@ -559,12 +555,6 @@ public:
   /// A helper function to check whether the DIE for a given Scope is
   /// going to be null.
   bool isLexicalScopeDIENull(LexicalScope *Scope);
-
-  // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
-
-  SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
-    return ProcessedSPNodes;
-  }
 };
 } // End of namespace llvm
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 8287f28..80d5bd2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -28,6 +28,8 @@ protected:
 
   /// Per-function flag to indicate if frame CFI info should be emitted.
   bool shouldEmitCFI;
+  /// Per-module flag to indicate if .cfi_section has beeen emitted.
+  bool hasEmittedCFISections;
 
   void markFunctionEnd() override;
   void endFragment() override;
@@ -46,8 +48,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
   /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
-  AsmPrinter::CFIMoveType moveTypeModule;
-
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -81,7 +81,7 @@ public:
   ~ARMException() override;
 
   /// Emit all exception information that should come after the content.
-  void endModule() override;
+  void endModule() override {}
 
   /// Gather pre-function exception information.  Assumes being emitted
   /// immediately after the function entry point.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 7dbc6cb..61b2c7e6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -46,7 +46,9 @@ void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
 }
 
 void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
-  assert(SizeInBits > 0 && "piece has size zero");
+  if (!SizeInBits)
+    return;
+
   const unsigned SizeOfByte = 8;
   if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
     EmitOp(dwarf::DW_OP_bit_piece);
@@ -57,6 +59,7 @@ void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
     unsigned ByteSize = SizeInBits / SizeOfByte;
     EmitUnsigned(ByteSize);
   }
+  this->OffsetInBits += SizeInBits;
 }
 
 void DwarfExpression::AddShr(unsigned ShiftBy) {
@@ -82,10 +85,8 @@ bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
   return true;
 }
 
-bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
-                                         unsigned MachineReg,
-                                         unsigned PieceSizeInBits,
-                                         unsigned PieceOffsetInBits) {
+bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+                                    unsigned MachineReg, unsigned MaxSize) {
   if (!TRI.isPhysicalRegister(MachineReg))
     return false;
 
@@ -94,13 +95,11 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
   // If this is a valid register number, emit it.
   if (Reg >= 0) {
     AddReg(Reg);
-    if (PieceSizeInBits)
-      AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
     return true;
   }
 
   // Walk up the super-register chain until we find a valid number.
-  // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+  // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
   for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
     Reg = TRI.getDwarfRegNum(*SR, false);
     if (Reg >= 0) {
@@ -108,27 +107,15 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
       unsigned Size = TRI.getSubRegIdxSize(Idx);
       unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
       AddReg(Reg, "super-register");
-      if (PieceOffsetInBits == RegOffset) {
-        AddOpPiece(Size, RegOffset);
-      } else {
-        // If this is part of a variable in a sub-register at a
-        // non-zero offset, we need to manually shift the value into
-        // place, since the DW_OP_piece describes the part of the
-        // variable, not the position of the subregister.
-        if (RegOffset)
-          AddShr(RegOffset);
-        AddOpPiece(Size, PieceOffsetInBits);
-      }
+      // Use a DW_OP_bit_piece to describe the sub-register.
+      setSubRegisterPiece(Size, RegOffset);
       return true;
     }
   }
 
   // Otherwise, attempt to find a covering set of sub-register numbers.
   // For example, Q0 on ARM is a composition of D0+D1.
-  //
-  // Keep track of the current position so we can emit the more
-  // efficient DW_OP_piece.
-  unsigned CurPos = PieceOffsetInBits;
+  unsigned CurPos = 0;
   // The size of the register in bits, assuming 8 bits per byte.
   unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
   // Keep track of the bits in the register we already emitted, so we
@@ -150,7 +137,12 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
     // its range, emit a DWARF piece for it.
     if (Reg >= 0 && Intersection.any()) {
       AddReg(Reg, "sub-register");
-      AddOpPiece(Size, Offset == CurPos ? 0 : Offset);
+      if (Offset >= MaxSize)
+	break;
+      // Emit a piece for the any gap in the coverage.
+      if (Offset > CurPos)
+        AddOpPiece(Offset - CurPos);
+      AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
       CurPos = Offset + Size;
 
       // Mark it as emitted.
@@ -158,7 +150,7 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
     }
   }
 
-  return CurPos > PieceOffsetInBits;
+  return CurPos;
 }
 
 void DwarfExpression::AddStackValue() {
@@ -194,92 +186,114 @@ void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
   }
 }
 
-static unsigned getOffsetOrZero(unsigned OffsetInBits,
-                                unsigned PieceOffsetInBits) {
-  if (OffsetInBits == PieceOffsetInBits)
-    return 0;
-  assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces");
-  return OffsetInBits;
-}
-
 bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
-                                              const DIExpression *Expr,
+                                              DIExpressionCursor &ExprCursor,
                                               unsigned MachineReg,
-                                              unsigned PieceOffsetInBits) {
-  auto I = Expr->expr_op_begin();
-  auto E = Expr->expr_op_end();
-  if (I == E)
-    return AddMachineRegPiece(TRI, MachineReg);
+                                              unsigned FragmentOffsetInBits) {
+  if (!ExprCursor)
+    return AddMachineReg(TRI, MachineReg);
 
   // Pattern-match combinations for which more efficient representations exist
   // first.
   bool ValidReg = false;
-  switch (I->getOp()) {
-  case dwarf::DW_OP_bit_piece: {
-    unsigned OffsetInBits = I->getArg(0);
-    unsigned SizeInBits   = I->getArg(1);
-    // Piece always comes at the end of the expression.
-    return AddMachineRegPiece(TRI, MachineReg, SizeInBits,
-               getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+  auto Op = ExprCursor.peek();
+  switch (Op->getOp()) {
+  default: {
+    auto Fragment = ExprCursor.getFragmentInfo();
+    ValidReg = AddMachineReg(TRI, MachineReg,
+			     Fragment ? Fragment->SizeInBits : ~1U);
+    break;
   }
   case dwarf::DW_OP_plus:
   case dwarf::DW_OP_minus: {
     // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
     // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
-    auto N = I.getNext();
-    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
-      unsigned Offset = I->getArg(0);
+    auto N = ExprCursor.peekNext();
+    if (N && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = Op->getArg(0);
       ValidReg = AddMachineRegIndirect(
-          TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
-      std::advance(I, 2);
-      break;
+          TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+      ExprCursor.consume(2);
     } else
-      ValidReg = AddMachineRegPiece(TRI, MachineReg);
-  }
-  case dwarf::DW_OP_deref: {
-      // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
-      ValidReg = AddMachineRegIndirect(TRI, MachineReg);
-      ++I;
-      break;
+      ValidReg = AddMachineReg(TRI, MachineReg);
+    break;
   }
-  default:
-    llvm_unreachable("unsupported operand");
+  case dwarf::DW_OP_deref:
+    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+    ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+    ExprCursor.take();
+    break;
   }
 
-  if (!ValidReg)
-    return false;
-
-  // Emit remaining elements of the expression.
-  AddExpression(I, E, PieceOffsetInBits);
-  return true;
+  return ValidReg;
 }
 
-void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
-                                    DIExpression::expr_op_iterator E,
-                                    unsigned PieceOffsetInBits) {
-  for (; I != E; ++I) {
-    switch (I->getOp()) {
-    case dwarf::DW_OP_bit_piece: {
-      unsigned OffsetInBits = I->getArg(0);
-      unsigned SizeInBits   = I->getArg(1);
-      AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+                                    unsigned FragmentOffsetInBits) {
+  while (ExprCursor) {
+    auto Op = ExprCursor.take();
+    switch (Op->getOp()) {
+    case dwarf::DW_OP_LLVM_fragment: {
+      unsigned SizeInBits = Op->getArg(1);
+      unsigned FragmentOffset = Op->getArg(0);
+      // The fragment offset must have already been adjusted by emitting an
+      // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
+      // location.
+      assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+
+      // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+      // a super-register by splicing together sub-registers, subtract the size
+      // of the pieces that was already emitted.
+      SizeInBits -= OffsetInBits - FragmentOffset;
+
+      // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+      // sub-register that is smaller than the current fragment's size, use it.
+      if (SubRegisterSizeInBits)
+        SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
+      
+      AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+      setSubRegisterPiece(0, 0);
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(I->getArg(0));
+      EmitUnsigned(Op->getArg(0));
       break;
     case dwarf::DW_OP_minus:
       // There is no OP_minus_uconst.
       EmitOp(dwarf::DW_OP_constu);
-      EmitUnsigned(I->getArg(0));
+      EmitUnsigned(Op->getArg(0));
       EmitOp(dwarf::DW_OP_minus);
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
       break;
+    case dwarf::DW_OP_constu:
+      EmitOp(dwarf::DW_OP_constu);
+      EmitUnsigned(Op->getArg(0));
+      break;
+    case dwarf::DW_OP_stack_value:
+      AddStackValue();
+      break;
     default:
       llvm_unreachable("unhandled opcode found in expression");
     }
   }
 }
+
+void DwarfExpression::finalize() {
+  if (SubRegisterSizeInBits)
+    AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+}
+
+void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
+  if (!Expr || !Expr->isFragment())
+    return;
+
+  uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits;
+  assert(FragmentOffset >= OffsetInBits &&
+         "overlapping or duplicate fragments");
+  if (FragmentOffset > OffsetInBits)
+    AddOpPiece(FragmentOffset - OffsetInBits);
+  OffsetInBits = FragmentOffset;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 5fff28d..fd90fa0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -25,17 +25,86 @@ class TargetRegisterInfo;
 class DwarfUnit;
 class DIELoc;
 
+/// Holds a DIExpression and keeps track of how many operands have been consumed
+/// so far.
+class DIExpressionCursor {
+  DIExpression::expr_op_iterator Start, End;
+public:
+  DIExpressionCursor(const DIExpression *Expr) {
+    if (!Expr) {
+      assert(Start == End);
+      return;
+    }
+    Start = Expr->expr_op_begin();
+    End = Expr->expr_op_end();
+  }
+
+  DIExpressionCursor(ArrayRef<uint64_t> Expr)
+      : Start(Expr.begin()), End(Expr.end()) {}
+
+  /// Consume one operation.
+  Optional<DIExpression::ExprOperand> take() {
+    if (Start == End)
+      return None;
+    return *(Start++);
+  }
+
+  /// Consume N operations.
+  void consume(unsigned N) { std::advance(Start, N); }
+
+  /// Return the current operation.
+  Optional<DIExpression::ExprOperand> peek() const {
+    if (Start == End)
+      return None;
+    return *(Start);
+  }
+
+  /// Return the next operation.
+  Optional<DIExpression::ExprOperand> peekNext() const {
+    if (Start == End)
+      return None;
+
+    auto Next = Start.getNext();
+    if (Next == End)
+      return None;
+
+    return *Next;
+  }
+  /// Determine whether there are any operations left in this expression.
+  operator bool() const { return Start != End; }
+
+  /// Retrieve the fragment information, if any.
+  Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
+    return DIExpression::getFragmentInfo(Start, End);
+  }
+};
+
 /// Base class containing the logic for constructing DWARF expressions
 /// independently of whether they are emitted into a DIE or into a .debug_loc
 /// entry.
 class DwarfExpression {
 protected:
-  // Various convenience accessors that extract things out of AsmPrinter.
   unsigned DwarfVersion;
+  /// Current Fragment Offset in Bits.
+  uint64_t OffsetInBits = 0;
+
+  /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. 
+  unsigned SubRegisterSizeInBits = 0;
+  unsigned SubRegisterOffsetInBits = 0;
+
+  /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
+  /// to represent a subregister.
+  void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+    SubRegisterSizeInBits = SizeInBits;
+    SubRegisterOffsetInBits = OffsetInBits;
+  }
 
 public:
   DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
-  virtual ~DwarfExpression() {}
+  virtual ~DwarfExpression() {};
+
+  /// This needs to be called last to commit any pending changes.
+  void finalize();
 
   /// Output a dwarf operand and an optional assembler comment.
   virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
@@ -52,24 +121,25 @@ public:
   /// Emit an (double-)indirect dwarf register operation.
   void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
 
-  /// Emit a dwarf register operation for describing
-  /// - a small value occupying only part of a register or
-  /// - a register representing only part of a value.
+  /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
+  /// \param OffsetInBits    This is an optional offset into the location that
+  /// is at the top of the DWARF stack.
   void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+
   /// Emit a shift-right dwarf expression.
   void AddShr(unsigned ShiftBy);
+
   /// Emit a DW_OP_stack_value, if supported.
   ///
-  /// The proper way to describe a constant value is
-  /// DW_OP_constu <const>, DW_OP_stack_value.
-  /// Unfortunately, DW_OP_stack_value was not available until DWARF-4,
-  /// so we will continue to generate DW_OP_constu <const> for DWARF-2
-  /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
-  /// actually describes a value at a constant addess, not a constant value.
-  /// However, in the past there was no better way  to describe a constant
-  /// value, so the producers and consumers started to rely on heuristics
-  /// to disambiguate the value vs. location status of the expression.
-  /// See PR21176 for more details.
+  /// The proper way to describe a constant value is DW_OP_constu <const>,
+  /// DW_OP_stack_value.  Unfortunately, DW_OP_stack_value was not available
+  /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
+  /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
+  /// <const> actually describes a value at a constant addess, not a constant
+  /// value.  However, in the past there was no better way to describe a
+  /// constant value, so the producers and consumers started to rely on
+  /// heuristics to disambiguate the value vs. location status of the
+  /// expression.  See PR21176 for more details.
   void AddStackValue();
 
   /// Emit an indirect dwarf register operation for the given machine register.
@@ -77,23 +147,23 @@ public:
   bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
                              int Offset = 0);
 
-  /// \brief Emit a partial DWARF register operation.
-  /// \param MachineReg        the register
-  /// \param PieceSizeInBits   size and
-  /// \param PieceOffsetInBits offset of the piece in bits, if this is one
-  ///                          piece of an aggregate value.
+  /// Emit a partial DWARF register operation.
+  ///
+  /// \param MachineReg           The register number.
+  /// \param MaxSize              If the register must be composed from
+  ///                             sub-registers this is an upper bound
+  ///                             for how many bits the emitted DW_OP_piece
+  ///                             may cover.
   ///
-  /// If size and offset is zero an operation for the entire
-  /// register is emitted: Some targets do not provide a DWARF
-  /// register number for every register.  If this is the case, this
-  /// function will attempt to emit a DWARF register by emitting a
-  /// piece of a super-register or by piecing together multiple
-  /// subregisters that alias the register.
+  /// If size and offset is zero an operation for the entire register is
+  /// emitted: Some targets do not provide a DWARF register number for every
+  /// register.  If this is the case, this function will attempt to emit a DWARF
+  /// register by emitting a fragment of a super-register or by piecing together
+  /// multiple subregisters that alias the register.
   ///
   /// \return false if no DWARF register exists for MachineReg.
-  bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg,
-                          unsigned PieceSizeInBits = 0,
-                          unsigned PieceOffsetInBits = 0);
+  bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+                     unsigned MaxSize = ~1U);
 
   /// Emit a signed constant.
   void AddSignedConstant(int64_t Value);
@@ -102,20 +172,29 @@ public:
   /// Emit an unsigned constant.
   void AddUnsignedConstant(const APInt &Value);
 
-  /// \brief Emit an entire expression on top of a machine register location.
+  /// Emit a machine register location. As an optimization this may also consume
+  /// the prefix of a DwarfExpression if a more efficient representation for
+  /// combining the register location and the first operation exists.
   ///
-  /// \param PieceOffsetInBits If this is one piece out of a fragmented
-  /// location, this is the offset of the piece inside the entire variable.
-  /// \return false if no DWARF register exists for MachineReg.
+  /// \param FragmentOffsetInBits     If this is one fragment out of a fragmented
+  ///                                 location, this is the offset of the
+  ///                                 fragment inside the entire variable.
+  /// \return                         false if no DWARF register exists
+  ///                                 for MachineReg.
   bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
-                               const DIExpression *Expr, unsigned MachineReg,
-                               unsigned PieceOffsetInBits = 0);
-  /// Emit a the operations remaining the DIExpressionIterator I.
-  /// \param PieceOffsetInBits If this is one piece out of a fragmented
-  /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression::expr_op_iterator I,
-                     DIExpression::expr_op_iterator E,
-                     unsigned PieceOffsetInBits = 0);
+                               DIExpressionCursor &Expr, unsigned MachineReg,
+                               unsigned FragmentOffsetInBits = 0);
+  /// Emit all remaining operations in the DIExpressionCursor.
+  ///
+  /// \param FragmentOffsetInBits     If this is one fragment out of multiple
+  ///                                 locations, this is the offset of the
+  ///                                 fragment inside the entire variable.
+  void AddExpression(DIExpressionCursor &&Expr,
+                     unsigned FragmentOffsetInBits = 0);
+
+  /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
+  /// the fragment described by \c Expr.
+  void addFragmentOffset(const DIExpression *Expr);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
@@ -146,6 +225,10 @@ public:
   void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(const TargetRegisterInfo &TRI,
                        unsigned MachineReg) override;
+  DIELoc *finalize() {
+    DwarfExpression::finalize();
+    return &DIE;
+  }
 };
 }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e9fe98a..595f1d9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -19,37 +19,7 @@
 
 namespace llvm {
 DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
-    : Asm(AP), StrPool(DA, *Asm, Pref) {}
-
-DwarfFile::~DwarfFile() {
-  for (DIEAbbrev *Abbrev : Abbreviations)
-    Abbrev->~DIEAbbrev();
-}
-
-// Define a unique number for the abbreviation.
-//
-DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
-  FoldingSetNodeID ID;
-  DIEAbbrev Abbrev = Die.generateAbbrev();
-  Abbrev.Profile(ID);
-
-  void *InsertPos;
-  if (DIEAbbrev *Existing =
-          AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
-    Die.setAbbrevNumber(Existing->getNumber());
-    return *Existing;
-  }
-
-  // Move the abbreviation to the heap and assign a number.
-  DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev));
-  Abbreviations.push_back(New);
-  New->setNumber(Abbreviations.size());
-  Die.setAbbrevNumber(Abbreviations.size());
-
-  // Store it for lookup.
-  AbbreviationsSet.InsertNode(New, InsertPos);
-  return *New;
-}
+    : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
 
 void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
   CUs.push_back(std::move(U));
@@ -80,7 +50,7 @@ void DwarfFile::computeSizeAndOffsets() {
   // Iterate over each compile unit and set the size and offsets for each
   // DIE within each compile unit. All offsets are CU relative.
   for (const auto &TheU : CUs) {
-    TheU->setDebugInfoOffset(SecOffset);
+    TheU->setDebugSectionOffset(SecOffset);
     SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
   }
 }
@@ -98,44 +68,10 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
 // Compute the size and offset of a DIE. The offset is relative to start of the
 // CU. It returns the offset after laying out the DIE.
 unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
-  // Record the abbreviation.
-  const DIEAbbrev &Abbrev = assignAbbrevNumber(Die);
-
-  // Set DIE offset
-  Die.setOffset(Offset);
-
-  // Start the size with the size of abbreviation code.
-  Offset += getULEB128Size(Die.getAbbrevNumber());
-
-  // Size the DIE attribute values.
-  for (const auto &V : Die.values())
-    // Size attribute value.
-    Offset += V.SizeOf(Asm);
-
-  // Size the DIE children if any.
-  if (Die.hasChildren()) {
-    (void)Abbrev;
-    assert(Abbrev.hasChildren() && "Children flag not set");
-
-    for (auto &Child : Die.children())
-      Offset = computeSizeAndOffset(Child, Offset);
-
-    // End of children marker.
-    Offset += sizeof(int8_t);
-  }
-
-  Die.setSize(Offset - Die.getOffset());
-  return Offset;
+  return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset);
 }
 
-void DwarfFile::emitAbbrevs(MCSection *Section) {
-  // Check to see if it is worth the effort.
-  if (!Abbreviations.empty()) {
-    // Start the debug abbrev section.
-    Asm->OutStreamer->SwitchSection(Section);
-    Asm->emitDwarfAbbrevs(Abbreviations);
-  }
-}
+void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
 
 // Emit strings into a string section.
 void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index b73d89b..d4d2ed2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include <memory>
-#include <vector>
 
 namespace llvm {
 class AsmPrinter;
@@ -41,10 +41,7 @@ class DwarfFile {
   BumpPtrAllocator AbbrevAllocator;
 
   // Used to uniquely define abbreviations.
-  FoldingSet<DIEAbbrev> AbbreviationsSet;
-
-  // A list of all the unique abbreviations in use.
-  std::vector<DIEAbbrev *> Abbreviations;
+  DIEAbbrevSet Abbrevs;
 
   // A pointer to all units in the section.
   SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
@@ -65,8 +62,6 @@ class DwarfFile {
 public:
   DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
 
-  ~DwarfFile();
-
   const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
     return CUs;
   }
@@ -81,12 +76,6 @@ public:
   /// \returns The size of the root DIE.
   unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
 
-  /// Define a unique number for the abbreviation.
-  ///
-  /// Compute the abbreviation for \c Die, look up its unique number, and
-  /// return a reference to it in the uniquing table.
-  DIEAbbrev &assignAbbrevNumber(DIE &Die);
-
   /// \brief Add a unit to the list of CUs.
   void addUnit(std::unique_ptr<DwarfCompileUnit> U);
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4100d72..2a866c0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,28 +12,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfUnit.h"
-#include "DwarfAccelTable.h"
+#include "AddressPool.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -46,18 +51,21 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
 
 DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
                                        DIELoc &DIE)
-    : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU),
+    : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
       DIE(DIE) {}
 
 void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
   DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
 }
+
 void DIEDwarfExpression::EmitSigned(int64_t Value) {
   DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
 }
+
 void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
   DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
 }
+
 bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
                                          unsigned MachineReg) {
   return MachineReg == TRI.getFrameRegister(*AP.MF);
@@ -65,10 +73,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
 
 DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
                      AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
-    : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A),
-      DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
-  assert(UnitTag == dwarf::DW_TAG_compile_unit ||
-         UnitTag == dwarf::DW_TAG_type_unit);
+    : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
+      Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
 }
 
 DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@@ -77,7 +83,7 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
     : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
       SplitLineTable(SplitLineTable) {
   if (SplitLineTable)
-    addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
+    addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
 }
 
 DwarfUnit::~DwarfUnit() {
@@ -194,6 +200,8 @@ void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(false, Integer);
+  assert(Form != dwarf::DW_FORM_implicit_const &&
+         "DW_FORM_implicit_const is used only for signed integers");
   Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
 }
 
@@ -286,15 +294,15 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
 
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
                             DIEEntry Entry) {
-  const DIE *DieCU = Die.getUnitOrNull();
-  const DIE *EntryCU = Entry.getEntry().getUnitOrNull();
-  if (!DieCU)
+  const DIEUnit *CU = Die.getUnit();
+  const DIEUnit *EntryCU = Entry.getEntry().getUnit();
+  if (!CU)
     // We assume that Die belongs to this CU, if it is not linked to any CU yet.
-    DieCU = &getUnitDie();
+    CU = getUnitDie().getUnit();
   if (!EntryCU)
-    EntryCU = &getUnitDie();
+    EntryCU = getUnitDie().getUnit();
   Die.addValue(DIEValueAllocator, Attribute,
-               EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+               EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
                Entry);
 }
 
@@ -365,21 +373,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
   addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
-bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
-                                   unsigned SizeInBits, unsigned OffsetInBits) {
-  DIEDwarfExpression Expr(*Asm, *this, TheDie);
-  Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg,
-                          SizeInBits, OffsetInBits);
-  return true;
-}
-
-bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
-                                  int64_t Offset) {
-  DIEDwarfExpression Expr(*Asm, *this, TheDie);
-  return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
-                                    Reg, Offset);
-}
-
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
    VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
    gives the variable VarName either the struct, or a pointer to the struct, as
@@ -472,12 +465,17 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   // Decode the original location, and use that as the start of the byref
   // variable's location.
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+  SmallVector<uint64_t, 6> DIExpr;
+  DIEDwarfExpression Expr(*Asm, *this, *Loc);
 
   bool validReg;
   if (Location.isReg())
-    validReg = addRegisterOpPiece(*Loc, Location.getReg());
+    validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                  Location.getReg());
   else
-    validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+    validReg =
+        Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                   Location.getReg(), Location.getOffset());
 
   if (!validReg)
     return;
@@ -485,27 +483,29 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    DIExpr.push_back(dwarf::DW_OP_deref);
 
   // Next add the offset for the '__forwarding' field:
   // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
   // adding the offset if it's 0.
   if (forwardingFieldOffset > 0) {
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
+    DIExpr.push_back(dwarf::DW_OP_plus);
+    DIExpr.push_back(forwardingFieldOffset);
   }
 
   // Now dereference the __forwarding field to get to the real __Block_byref
   // struct:  DW_OP_deref.
-  addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+  DIExpr.push_back(dwarf::DW_OP_deref);
 
   // Now that we've got the real __Block_byref... struct, add the offset
   // for the variable's field to get to the location of the actual variable:
   // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
   if (varFieldOffset > 0) {
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset);
+    DIExpr.push_back(dwarf::DW_OP_plus);
+    DIExpr.push_back(varFieldOffset);
   }
+  Expr.AddExpression(makeArrayRef(DIExpr));
+  Expr.finalize();
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, Loc);
@@ -538,7 +538,7 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
       return true;
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
-           T == dwarf::DW_TAG_restrict_type);
+           T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
     DITypeRef Deriv = DTy->getBaseType();
     assert(Deriv && "Expected valid base type");
     return isUnsignedDIType(DD, DD->resolve(Deriv));
@@ -699,6 +699,10 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
     return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
 
+  // DW_TAG_atomic_type is not supported in DWARF < 5
+  if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
+    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
   auto *Context = resolve(Ty->getScope());
@@ -735,7 +739,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
                                         const DIType *Ty, const DIE &TyDIE) {
   if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
-    bool IsImplementation = 0;
+    bool IsImplementation = false;
     if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
@@ -999,6 +1003,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
+
+    // Add align info if available.
+    if (uint32_t AlignInBytes = CTy->getAlignInBytes())
+      addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+              AlignInBytes);
   }
 }
 
@@ -1066,6 +1075,8 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
   DD->addAccelNamespace(Name, NDie);
   addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
+  if (NS->getExportSymbols())
+    addFlag(NDie, dwarf::DW_AT_export_symbols);
   return &NDie;
 }
 
@@ -1133,7 +1144,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl->getLinkageName();
+    // Look at the Decl's linkage name only if we emitted it.
+    if (DD->useAllLinkageNames())
+      DeclLinkageName = SPDecl->getLinkageName();
     unsigned DeclID =
         getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
     unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
@@ -1248,6 +1261,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
   if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
+  if (SP->isNoReturn())
+    addFlag(SPDie, dwarf::DW_AT_noreturn);
+
   if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
@@ -1260,6 +1276,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
 
   if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
+
+  if (SP->isMainSubprogram())
+    addFlag(SPDie, dwarf::DW_AT_main_subprogram);
 }
 
 void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1288,7 +1307,7 @@ DIE *DwarfUnit::getIndexTyDie() {
   if (IndexTyDie)
     return IndexTyDie;
   // Construct an integer type to use for indexes.
-  IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+  IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
   addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
   addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
   addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
@@ -1383,6 +1402,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
   } else {
     uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = DD->getBaseTypeSize(DT);
+    uint32_t AlignInBytes = DT->getAlignInBytes();
     uint64_t OffsetInBytes;
 
     bool IsBitfield = FieldSize && Size != FieldSize;
@@ -1393,8 +1413,11 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
       uint64_t Offset = DT->getOffsetInBits();
-      uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
-      uint64_t AlignMask = ~(Align - 1);
+      // We can't use DT->getAlignInBits() here: AlignInBits for member type
+      // is non-zero if and only if alignment was forced (e.g. _Alignas()),
+      // which can't be done with bitfields. Thus we use FieldSize here.
+      uint32_t AlignInBits = FieldSize;
+      uint32_t AlignMask = ~(AlignInBits - 1);
       // The bits from the start of the storage unit to the start of the field.
       uint64_t StartBitOffset = Offset - (Offset & AlignMask);
       // The byte offset of the field's aligned storage unit inside the struct.
@@ -1417,6 +1440,9 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
     } else {
       // This is not a bitfield.
       OffsetInBytes = DT->getOffsetInBits() / 8;
+      if (AlignInBytes)
+        addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+                AlignInBytes);
     }
 
     if (DD->getDwarfVersion() <= 2) {
@@ -1493,13 +1519,17 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
   if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
+  if (uint32_t AlignInBytes = DT->getAlignInBytes())
+    addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+            AlignInBytes);
+
   return &StaticMemberDIE;
 }
 
 void DwarfUnit::emitHeader(bool UseOffsets) {
   // Emit size of content not including length itself
   Asm->OutStreamer->AddComment("Length of Unit");
-  Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+  Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
 
   Asm->OutStreamer->AddComment("DWARF version number");
   Asm->EmitInt16(DD->getDwarfVersion());
@@ -1519,11 +1549,6 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 }
 
-void DwarfUnit::initSection(MCSection *Section) {
-  assert(!this->Section);
-  this->Section = Section;
-}
-
 void DwarfTypeUnit::emitHeader(bool UseOffsets) {
   DwarfUnit::emitHeader(UseOffsets);
   Asm->OutStreamer->AddComment("Type Signature");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index e225f92..8654d6f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -65,7 +65,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// This dwarf writer support class manages information associated with a
 /// source file.
-class DwarfUnit {
+  class DwarfUnit : public DIEUnit {
 protected:
   /// MDNode for the compile unit.
   const DICompileUnit *CUNode;
@@ -73,9 +73,6 @@ protected:
   // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  /// Unit debug information entry.
-  DIE &UnitDie;
-
   /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
@@ -83,7 +80,7 @@ protected:
   DwarfDebug *DD;
   DwarfFile *DU;
 
-  /// An anonymous type for index type.  Owned by UnitDie.
+  /// An anonymous type for index type.  Owned by DIEUnit.
   DIE *IndexTyDie;
 
   /// Tracks the mapping of unit level debug information variables to debug
@@ -101,9 +98,6 @@ protected:
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const DINode *> ContainingTypeMap;
 
-  /// The section this unit will be emitted in.
-  MCSection *Section;
-
   DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
             DwarfFile *DWU);
 
@@ -112,21 +106,13 @@ protected:
 public:
   virtual ~DwarfUnit();
 
-  void initSection(MCSection *Section);
-
-  MCSection *getSection() const {
-    assert(Section);
-    return Section;
-  }
-
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
   const DICompileUnit *getCUNode() const { return CUNode; }
-  DIE &getUnitDie() { return UnitDie; }
 
   /// Return true if this compile unit has something to write out.
-  bool hasContent() const { return UnitDie.hasChildren(); }
+  bool hasContent() const { return getUnitDie().hasChildren(); }
 
   /// Get string containing language specific context for a global name.
   ///
@@ -249,17 +235,6 @@ public:
   /// Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
-  /// Add register operand.
-  /// \returns false if the register does not exist, e.g., because it was never
-  /// materialized.
-  bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
-                          unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
-
-  /// Add register offset.
-  /// \returns false if the register does not exist, e.g., because it was never
-  /// materialized.
-  bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
-
   // FIXME: Should be reformulated in terms of addComplexAddress.
   /// Start with the address based on the location provided, and generate the
   /// DWARF information necessary to find the actual Block variable (navigating
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e24dcb1..0a4a7a0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -74,7 +74,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
   // output using a fixed width encoding.  FilterOffsets[i] holds the byte
   // offset corresponding to FilterIds[i].
 
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds();
   SmallVector<int, 16> FilterOffsets;
   FilterOffsets.reserve(FilterIds.size());
   int Offset = -1;
@@ -296,7 +296,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         else {
           // SjLj EH must maintain the call sites in the order assigned
           // to them by the SjLjPrepare pass.
-          unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+          unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel);
           if (CallSites.size() < SiteNo)
             CallSites.resize(SiteNo);
           CallSites[SiteNo - 1] = Site;
@@ -336,9 +336,10 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
 void EHStreamer::emitExceptionTable() {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads();
 
   // Sort the landing pads in order of their type ids.  This is used to fold
   // duplicate actions.
@@ -649,8 +650,9 @@ void EHStreamer::emitExceptionTable() {
 }
 
 void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c09ef6a..8baee4d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -50,7 +50,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   std::string SymName;
   SymName += "caml";
   size_t Letter = SymName.size();
-  SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+  SymName.append(MId.begin(), find(MId, '.'));
   SymName += "__";
   SymName += Id;
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index e5933d8..9d7c96a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -63,8 +63,8 @@ void WinException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
-  bool hasLandingPads = !MMI->getLandingPads().empty();
-  bool hasEHFunclets = MMI->hasEHFunclets();
+  bool hasLandingPads = !MF->getLandingPads().empty();
+  bool hasEHFunclets = MF->hasEHFunclets();
 
   const Function *F = MF->getFunction();
 
@@ -72,17 +72,21 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = nullptr;
-  if (F->hasPersonalityFn())
-    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
 
-  bool forceEmitPersonality =
-    F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
-    F->needsUnwindTableEntry();
+  EHPersonality Per = EHPersonality::Unknown;
+  const Function *PerFn = nullptr;
+  if (F->hasPersonalityFn()) {
+    PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+    Per = classifyEHPersonality(PerFn);
+  }
+
+  bool forceEmitPersonality = F->hasPersonalityFn() &&
+                              !isNoOpWithoutInvoke(Per) &&
+                              F->needsUnwindTableEntry();
 
   shouldEmitPersonality =
       forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
-                               PerEncoding != dwarf::DW_EH_PE_omit && Per);
+                               PerEncoding != dwarf::DW_EH_PE_omit && PerFn);
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
   shouldEmitLSDA = shouldEmitPersonality &&
@@ -90,7 +94,16 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   // If we're not using CFI, we don't want the CFI or the personality, but we
   // might want EH tables if we had EH pads.
-  if (!Asm->MAI->usesWindowsCFI()) {
+  if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+    if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
+      // If this is 32-bit SEH and we don't have any funclets (really invokes),
+      // make sure we emit the parent offset label. Some unreferenced filter
+      // functions may still refer to it.
+      const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+      StringRef FLinkageName =
+          GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+      emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+    }
     shouldEmitLSDA = hasEHFunclets;
     shouldEmitPersonality = false;
     return;
@@ -108,18 +121,20 @@ void WinException::endFunction(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
   EHPersonality Per = EHPersonality::Unknown;
   if (F->hasPersonalityFn())
-    Per = classifyEHPersonality(F->getPersonalityFn());
+    Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
 
   // Get rid of any dead landing pads if we're not using funclets. In funclet
   // schemes, the landing pad is not actually reachable. It only exists so
   // that we can emit the right table data.
-  if (!isFuncletEHPersonality(Per))
-    MMI->TidyLandingPads();
+  if (!isFuncletEHPersonality(Per)) {
+    MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF);
+    NonConstMF->tidyLandingPads();
+  }
 
   endFunclet();
 
   // endFunclet will emit the necessary .xdata tables for x64 SEH.
-  if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+  if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
     return;
 
   if (shouldEmitPersonality || shouldEmitLSDA) {
@@ -147,7 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) {
   }
 }
 
-/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock.
 static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
                                    const MachineBasicBlock *MBB) {
   if (!MBB)
@@ -193,8 +208,10 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
   }
 
   // Mark 'Sym' as starting our funclet.
-  if (shouldEmitMoves || shouldEmitPersonality)
+  if (shouldEmitMoves || shouldEmitPersonality) {
+    CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
     Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+  }
 
   if (shouldEmitPersonality) {
     const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -204,16 +221,14 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
     if (F->hasPersonalityFn())
       PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
     const MCSymbol *PersHandlerSym =
-        TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
-
-    // Classify the personality routine so that we may reason about it.
-    EHPersonality Per = EHPersonality::Unknown;
-    if (F->hasPersonalityFn())
-      Per = classifyEHPersonality(F->getPersonalityFn());
-
-    // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
-    if (Per != EHPersonality::MSVC_CXX ||
-        !CurrentFuncletEntry->isCleanupFuncletEntry())
+        TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
+
+    // Do not emit a .seh_handler directives for cleanup funclets.
+    // FIXME: This means cleanup funclets cannot handle exceptions. Given that
+    // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's
+    // inliner doesn't allow inlining them, this isn't a major problem in
+    // practice.
+    if (!CurrentFuncletEntry->isCleanupFuncletEntry())
       Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
   }
 }
@@ -223,15 +238,12 @@ void WinException::endFunclet() {
   if (!CurrentFuncletEntry)
     return;
 
+  const MachineFunction *MF = Asm->MF;
   if (shouldEmitMoves || shouldEmitPersonality) {
-    const Function *F = Asm->MF->getFunction();
+    const Function *F = MF->getFunction();
     EHPersonality Per = EHPersonality::Unknown;
     if (F->hasPersonalityFn())
-      Per = classifyEHPersonality(F->getPersonalityFn());
-
-    // The .seh_handlerdata directive implicitly switches section, push the
-    // current section so that we may return to it.
-    Asm->OutStreamer->PushSection();
+      Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
 
     // Emit an UNWIND_INFO struct describing the prologue.
     Asm->OutStreamer->EmitWinEHHandlerData();
@@ -244,18 +256,17 @@ void WinException::endFunclet() {
       MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
           Twine("$cppxdata$", FuncLinkageName));
       Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
-    } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+    } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
                !CurrentFuncletEntry->isEHFuncletEntry()) {
       // If this is the parent function in Win64 SEH, emit the LSDA immediately
       // following .seh_handlerdata.
-      emitCSpecificHandlerTable(Asm->MF);
+      emitCSpecificHandlerTable(MF);
     }
 
-    // Switch back to the previous section now that we are done writing to
-    // .xdata.
-    Asm->OutStreamer->PopSection();
-
-    // Emit a .seh_endproc directive to mark the end of the function.
+    // Switch back to the funclet start .text section now that we are done
+    // writing to .xdata, and emit an .seh_endproc directive to mark the end of
+    // the function.
+    Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
     Asm->OutStreamer->EmitWinCFIEndProc();
   }
 
@@ -905,15 +916,24 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   // registration in order to recover the parent frame pointer. Now that we know
   // we've code generated the parent, we can emit the label assignment that
   // those helpers use to get the offset of the registration node.
+
+  // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if
+  // after optimization all the invokes were eliminated. We still need to emit
+  // the parent frame offset label, but it should be garbage and should never be
+  // used.
+  int64_t Offset = 0;
+  int FI = FuncInfo.EHRegNodeFrameIndex;
+  if (FI != INT_MAX) {
+    const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+    unsigned UnusedReg;
+    Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+  }
+
   MCContext &Ctx = Asm->OutContext;
   MCSymbol *ParentFrameOffset =
       Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  unsigned UnusedReg;
-  const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
-  int64_t Offset = TFI->getFrameIndexReference(
-      *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
-  const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
-  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  Asm->OutStreamer->EmitAssignment(ParentFrameOffset,
+                                   MCConstantExpr::create(Offset, Ctx));
 }
 
 /// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -966,11 +986,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
 
     // Retrieve the Guard Stack slot.
     int GSCookieOffset = -2;
-    const MachineFrameInfo *MFI = MF->getFrameInfo();
-    if (MFI->hasStackProtectorIndex()) {
+    const MachineFrameInfo &MFI = MF->getFrameInfo();
+    if (MFI.hasStackProtectorIndex()) {
       unsigned UnusedReg;
       const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
-      int SSPIdx = MFI->getStackProtectorIndex();
+      int SSPIdx = MFI.getStackProtectorIndex();
       GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
     }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index acb3010..371061c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
 class GlobalValue;
 class MachineFunction;
 class MCExpr;
+class MCSection;
 class Value;
 struct WinEHFuncInfo;
 
@@ -40,6 +41,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// Pointer to the current funclet entry BB.
   const MachineBasicBlock *CurrentFuncletEntry = nullptr;
 
+  /// The section of the last funclet start.
+  MCSection *CurrentFuncletTextSection = nullptr;
+
   void emitCSpecificHandlerTable(const MachineFunction *MF);
 
   void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 5dacbf9..6fba161 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -110,9 +110,12 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
 
 BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
                            MBFIWrapper &FreqInfo,
-                           const MachineBranchProbabilityInfo &ProbInfo)
-    : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
-      MBPI(ProbInfo) {
+                           const MachineBranchProbabilityInfo &ProbInfo,
+                           unsigned MinTailLength)
+    : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
+      MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+  if (MinCommonTailLength == 0)
+    MinCommonTailLength = TailMergeSize;
   switch (FlagEnableTailMerge) {
   case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
   case cl::BOU_TRUE: EnableTailMerge = true; break;
@@ -141,59 +144,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
     MLI->removeBlock(MBB);
 }
 
-/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
-/// followed by terminators, and if the implicitly defined registers are not
-/// used by the terminators, remove those implicit_def's. e.g.
-/// BB1:
-///   r0 = implicit_def
-///   r1 = implicit_def
-///   br
-/// This block can be optimized away later if the implicit instructions are
-/// removed.
-bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
-  SmallSet<unsigned, 4> ImpDefRegs;
-  MachineBasicBlock::iterator I = MBB->begin();
-  while (I != MBB->end()) {
-    if (!I->isImplicitDef())
-      break;
-    unsigned Reg = I->getOperand(0).getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        ImpDefRegs.insert(*SubRegs);
-    } else {
-      ImpDefRegs.insert(Reg);
-    }
-    ++I;
-  }
-  if (ImpDefRegs.empty())
-    return false;
-
-  MachineBasicBlock::iterator FirstTerm = I;
-  while (I != MBB->end()) {
-    if (!TII->isUnpredicatedTerminator(*I))
-      return false;
-    // See if it uses any of the implicitly defined registers.
-    for (const MachineOperand &MO : I->operands()) {
-      if (!MO.isReg() || !MO.isUse())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (ImpDefRegs.count(Reg))
-        return false;
-    }
-    ++I;
-  }
-
-  I = MBB->begin();
-  while (I != FirstTerm) {
-    MachineInstr *ImpDefMI = &*I;
-    ++I;
-    MBB->erase(ImpDefMI);
-  }
-
-  return true;
-}
-
 /// OptimizeFunction - Perhaps branch folding, tail merging and other
 /// CFG optimizations on the given function.  Block placement changes the layout
 /// and may create new tail merging opportunities.
@@ -224,7 +174,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     SmallVector<MachineOperand, 4> Cond;
     if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
       MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-    MadeChange |= OptimizeImpDefsBlock(&MBB);
   }
 
   // Recalculate funclet membership.
@@ -399,37 +348,16 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   return TailLen;
 }
 
-void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) {
-  if (!UpdateLiveIns)
-    return;
-
-  LiveRegs.init(TRI);
-  LiveRegs.addLiveOutsNoPristines(MBB);
-  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
-    LiveRegs.stepBackward(MI);
-
-  for (unsigned Reg : LiveRegs) {
-    // Skip the register if we are about to add one of its super registers.
-    bool ContainsSuperReg = false;
-    for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) {
-      if (LiveRegs.contains(*SReg)) {
-        ContainsSuperReg = true;
-        break;
-      }
-    }
-    if (ContainsSuperReg)
-      continue;
-    MBB.addLiveIn(Reg);
-  }
-}
-
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
   TII->ReplaceTailWithBranchTo(OldInst, NewDest);
 
-  computeLiveIns(*NewDest);
+  if (UpdateLiveIns) {
+    NewDest->clearLiveIns();
+    computeLiveIns(LiveRegs, *TRI, *NewDest);
+  }
 
   ++NumTailMerge;
 }
@@ -467,7 +395,8 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   // NewMBB inherits CurMBB's block frequency.
   MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
 
-  computeLiveIns(*NewMBB);
+  if (UpdateLiveIns)
+    computeLiveIns(LiveRegs, *TRI, *NewMBB);
 
   // Add the new block to the funclet.
   const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -511,14 +440,14 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
   if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
     MachineBasicBlock *NextBB = &*I;
     if (TBB == NextBB && !Cond.empty() && !FBB) {
-      if (!TII->ReverseBranchCondition(Cond)) {
-        TII->RemoveBranch(*CurMBB);
-        TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+      if (!TII->reverseBranchCondition(Cond)) {
+        TII->removeBranch(*CurMBB);
+        TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
         return;
       }
     }
   }
-  TII->InsertBranch(*CurMBB, SuccBB, nullptr,
+  TII->insertBranch(*CurMBB, SuccBB, nullptr,
                     SmallVector<MachineOperand, 0>(), dl);
 }
 
@@ -591,13 +520,26 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
 /// and decide if it would be profitable to merge those tails.  Return the
 /// length of the common tail and iterators to the first common instruction
 /// in each block.
+/// MBB1, MBB2      The blocks to check
+/// MinCommonTailLength  Minimum size of tail block to be merged.
+/// CommonTailLen   Out parameter to record the size of the shared tail between
+///                 MBB1 and MBB2
+/// I1, I2          Iterator references that will be changed to point to the first
+///                 instruction in the common tail shared by MBB1,MBB2
+/// SuccBB          A common successor of MBB1, MBB2 which are in a canonical form
+///                 relative to SuccBB
+/// PredBB          The layout predecessor of SuccBB, if any.
+/// FuncletMembership  map from block to funclet #.
+/// AfterPlacement  True if we are merging blocks after layout. Stricter
+///                 thresholds apply to prevent undoing tail-duplication.
 static bool
 ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
-                  unsigned minCommonTailLength, unsigned &CommonTailLen,
+                  unsigned MinCommonTailLength, unsigned &CommonTailLen,
                   MachineBasicBlock::iterator &I1,
                   MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
                   MachineBasicBlock *PredBB,
-                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
+                  bool AfterPlacement) {
   // It is never profitable to tail-merge blocks from two different funclets.
   if (!FuncletMembership.empty()) {
     auto Funclet1 = FuncletMembership.find(MBB1);
@@ -617,7 +559,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
-  if (MBB1 == PredBB || MBB2 == PredBB) {
+  // This is true only for a single successor. For multiple successors, we are
+  // trading a conditional branch for an unconditional one.
+  // TODO: Re-visit successor size for non-layout tail merging.
+  if ((MBB1 == PredBB || MBB2 == PredBB) &&
+      (!AfterPlacement || MBB1->succ_size() == 1)) {
     MachineBasicBlock::iterator I;
     unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
     if (CommonTailLen > NumTerms)
@@ -635,15 +581,18 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 
   // If both blocks have an unconditional branch temporarily stripped out,
   // count that as an additional common instruction for the following
-  // heuristics.
+  // heuristics. This heuristic is only accurate for single-succ blocks, so to
+  // make sure that during layout merging and duplicating don't crash, we check
+  // for that when merging during layout.
   unsigned EffectiveTailLen = CommonTailLen;
   if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+      (MBB1->succ_size() == 1 || !AfterPlacement) &&
       !MBB1->back().isBarrier() &&
       !MBB2->back().isBarrier())
     ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
-  if (EffectiveTailLen >= minCommonTailLength)
+  if (EffectiveTailLen >= MinCommonTailLength)
     return true;
 
   // If we are optimizing for code size, 2 instructions in common is enough if
@@ -666,7 +615,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 /// those blocks appear in MergePotentials (where they are not necessarily
 /// consecutive).
 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
-                                        unsigned minCommonTailLength,
+                                        unsigned MinCommonTailLength,
                                         MachineBasicBlock *SuccBB,
                                         MachineBasicBlock *PredBB) {
   unsigned maxCommonTailLength = 0U;
@@ -679,10 +628,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
     for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
       unsigned CommonTailLen;
       if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
-                            minCommonTailLength,
+                            MinCommonTailLength,
                             CommonTailLen, TrialBBI1, TrialBBI2,
                             SuccBB, PredBB,
-                            FuncletMembership)) {
+                            FuncletMembership,
+                            AfterBlockPlacement)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -749,8 +699,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
     SameTails[commonTailIndex].getTailStartPos();
   MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
 
-  // If the common tail includes any debug info we will take it pretty
-  // randomly from one of the inputs.  Might be better to remove it?
   DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
@@ -832,14 +780,13 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
 // branch to Succ added (but the predecessor/successor lists need no
 // adjustment). The lone predecessor of Succ that falls through into Succ,
 // if any, is given in PredBB.
+// MinCommonTailLength - Except for the special cases below, tail-merge if
+// there are at least this many instructions in common.
 bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
-                                      MachineBasicBlock *PredBB) {
+                                      MachineBasicBlock *PredBB,
+                                      unsigned MinCommonTailLength) {
   bool MadeChange = false;
 
-  // Except for the special cases below, tail-merge if there are at least
-  // this many instructions in common.
-  unsigned minCommonTailLength = TailMergeSize;
-
   DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
         for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
           dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
@@ -852,8 +799,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
                    << PredBB->getNumber() << "\n";
         }
         dbgs() << "Looking for common tails of at least "
-               << minCommonTailLength << " instruction"
-               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+               << MinCommonTailLength << " instruction"
+               << (MinCommonTailLength == 1 ? "" : "s") << '\n';
        );
 
   // Sort by hash value so that blocks with identical end sequences sort
@@ -867,10 +814,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // Build SameTails, identifying the set of blocks with this hash code
     // and with the maximum number of instructions in common.
     unsigned maxCommonTailLength = ComputeSameTails(CurHash,
-                                                    minCommonTailLength,
+                                                    MinCommonTailLength,
                                                     SuccBB, PredBB);
 
-    // If we didn't find any pair that has at least minCommonTailLength
+    // If we didn't find any pair that has at least MinCommonTailLength
     // instructions in common, remove all blocks with this hash code and retry.
     if (SameTails.empty()) {
       RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -928,6 +875,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // Recompute common tail MBB's edge weights and block frequency.
     setCommonTailEdgeWeights(*MBB);
 
+    // Remove the original debug location from the common tail.
+    for (auto &MI : *MBB)
+      if (!MI.isDebugValue())
+        MI.setDebugLoc(DebugLoc());
+
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
     DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
@@ -976,7 +928,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
     // See if we can do any tail merging on those.
     if (MergePotentials.size() >= 2)
-      MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+      MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
   }
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
@@ -1056,7 +1008,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // branch.
         SmallVector<MachineOperand, 4> NewCond(Cond);
         if (!Cond.empty() && TBB == IBB) {
-          if (TII->ReverseBranchCondition(NewCond))
+          if (TII->reverseBranchCondition(NewCond))
             continue;
           // This is the QBB case described above
           if (!FBB) {
@@ -1092,10 +1044,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Remove the unconditional branch at the end, if any.
         if (TBB && (Cond.empty() || FBB)) {
           DebugLoc dl;  // FIXME: this is nowhere
-          TII->RemoveBranch(*PBB);
+          TII->removeBranch(*PBB);
           if (!Cond.empty())
             // reinsert conditional branch only, for now
-            TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+            TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
                               NewCond, dl);
         }
 
@@ -1110,7 +1062,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         TriedMerging.insert(MergePotentials[i].getBlock());
 
     if (MergePotentials.size() >= 2)
-      MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
 
     // Reinsert an unconditional branch if needed. The 1 below can occur as a
     // result of removing blocks in TryTailMergeBlocks.
@@ -1311,10 +1263,10 @@ ReoptimizeBlock:
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
       DebugLoc dl = getBranchDebugLoc(PrevBB);
-      TII->RemoveBranch(PrevBB);
+      TII->removeBranch(PrevBB);
       PriorCond.clear();
       if (PriorTBB != MBB)
-        TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+        TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1359,7 +1311,7 @@ ReoptimizeBlock:
     // If the previous branch *only* branches to *this* block (conditional or
     // not) remove the branch.
     if (PriorTBB == MBB && !PriorFBB) {
-      TII->RemoveBranch(PrevBB);
+      TII->removeBranch(PrevBB);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1369,8 +1321,8 @@ ReoptimizeBlock:
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
       DebugLoc dl = getBranchDebugLoc(PrevBB);
-      TII->RemoveBranch(PrevBB);
-      TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+      TII->removeBranch(PrevBB);
+      TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1381,10 +1333,10 @@ ReoptimizeBlock:
     // fall-through.
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
-      if (!TII->ReverseBranchCondition(NewPriorCond)) {
+      if (!TII->reverseBranchCondition(NewPriorCond)) {
         DebugLoc dl = getBranchDebugLoc(PrevBB);
-        TII->RemoveBranch(PrevBB);
-        TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+        TII->removeBranch(PrevBB);
+        TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1416,13 +1368,13 @@ ReoptimizeBlock:
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
-        if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        if (!TII->reverseBranchCondition(NewPriorCond)) {
           DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
 
           DebugLoc dl = getBranchDebugLoc(PrevBB);
-          TII->RemoveBranch(PrevBB);
-          TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+          TII->removeBranch(PrevBB);
+          TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
 
           // Move this block to the end of the function.
           MBB->moveAfter(&MF.back());
@@ -1450,10 +1402,10 @@ ReoptimizeBlock:
     //    Loop: xxx; jncc Loop; jmp Out
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
-      if (!TII->ReverseBranchCondition(NewCond)) {
+      if (!TII->reverseBranchCondition(NewCond)) {
         DebugLoc dl = getBranchDebugLoc(*MBB);
-        TII->RemoveBranch(*MBB);
-        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+        TII->removeBranch(*MBB);
+        TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1469,7 +1421,7 @@ ReoptimizeBlock:
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
-      TII->RemoveBranch(*MBB);
+      TII->removeBranch(*MBB);
       // If the only things remaining in the block are debug info, remove these
       // as well, so this will behave the same as an empty block in non-debug
       // mode.
@@ -1500,8 +1452,8 @@ ReoptimizeBlock:
               PriorFBB = MBB;
             }
             DebugLoc pdl = getBranchDebugLoc(PrevBB);
-            TII->RemoveBranch(PrevBB);
-            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+            TII->removeBranch(PrevBB);
+            TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1526,9 +1478,9 @@ ReoptimizeBlock:
                   *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
                 DebugLoc pdl = getBranchDebugLoc(*PMBB);
-                TII->RemoveBranch(*PMBB);
+                TII->removeBranch(*PMBB);
                 NewCurCond.clear();
-                TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
+                TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
                 MadeChange = true;
                 ++NumBranchOpts;
                 PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
@@ -1548,7 +1500,7 @@ ReoptimizeBlock:
       }
 
       // Add the branch back if the block is more than just an uncond branch.
-      TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+      TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
     }
   }
 
@@ -1585,7 +1537,7 @@ ReoptimizeBlock:
           if (CurFallsThru) {
             MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
             CurCond.clear();
-            TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
+            TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
@@ -1615,18 +1567,22 @@ ReoptimizeBlock:
 
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
-      // removed, move this block to the end of the function.
+      // removed, move this block to the end of the function. There is no real
+      // advantage in "falling through" to an EH block, so we don't want to
+      // perform this transformation for that case.
+      //
+      // Also, Windows EH introduced the possibility of an arbitrary number of
+      // successors to a given block.  The analyzeBranch call does not consider
+      // exception handling and so we can get in a state where a block
+      // containing a call is followed by multiple EH blocks that would be
+      // rotated infinitely at the end of the function if the transformation
+      // below were performed for EH "FallThrough" blocks.  Therefore, even if
+      // that appears not to be happening anymore, we should assume that it is
+      // possible and not remove the "!FallThrough()->isEHPad" condition below.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
-      // We're looking for cases where PrevBB could possibly fall through to
-      // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
-      // so here we skip over any EH pads so we might have a chance to find
-      // a branch target from PrevBB.
-      while (FallThrough != MF.end() && FallThrough->isEHPad())
-        ++FallThrough;
-      // Now check to see if the current block is sitting between PrevBB and
-      // a block to which it could fall through.
       if (FallThrough != MF.end() &&
+          !FallThrough->isEHPad() &&
           !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(&*FallThrough)) {
         MBB->moveAfter(&MF.back());
@@ -1720,10 +1676,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
 
   // The terminator is probably a conditional branch, try not to separate the
   // branch from condition setting instruction.
-  MachineBasicBlock::iterator PI = Loc;
-  --PI;
-  while (PI != MBB->begin() && PI->isDebugValue())
-    --PI;
+  MachineBasicBlock::iterator PI =
+    skipDebugInstructionsBackward(std::prev(Loc), MBB->begin());
 
   bool IsDef = false;
   for (const MachineOperand &MO : PI->operands()) {
@@ -1817,18 +1771,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
   MachineBasicBlock::iterator FIE = FBB->end();
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TIB->isDebugValue()) {
-      while (TIB != TIE && TIB->isDebugValue())
-        ++TIB;
-      if (TIB == TIE)
-        break;
-    }
-    if (FIB->isDebugValue()) {
-      while (FIB != FIE && FIB->isDebugValue())
-        ++FIB;
-      if (FIB == FIE)
-        break;
-    }
+    TIB = skipDebugInstructionsForward(TIB, TIE);
+    FIB = skipDebugInstructionsForward(FIB, FIE);
+    if (TIB == TIE || FIB == FIE)
+      break;
+
     if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
       break;
 
@@ -1929,14 +1876,21 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
   FBB->erase(FBB->begin(), FIB);
 
   // Update livein's.
+  bool AddedLiveIns = false;
   for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
     unsigned Def = LocalDefs[i];
     if (LocalDefsSet.count(Def)) {
       TBB->addLiveIn(Def);
       FBB->addLiveIn(Def);
+      AddedLiveIns = true;
     }
   }
 
+  if (AddedLiveIns) {
+    TBB->sortUniqueLiveIns();
+    FBB->sortUniqueLiveIns();
+  }
+
   ++NumHoist;
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 36a5a2e..fc48e48 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -29,9 +29,13 @@ namespace llvm {
   public:
     class MBFIWrapper;
 
-    explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+    explicit BranchFolder(bool defaultEnableTailMerge,
+                          bool CommonHoist,
                           MBFIWrapper &MBFI,
-                          const MachineBranchProbabilityInfo &MBPI);
+                          const MachineBranchProbabilityInfo &MBPI,
+                          // Min tail length to merge. Defaults to commandline
+                          // flag. Ignored for optsize.
+                          unsigned MinCommonTailLength = 0);
 
     bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
                           const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
@@ -99,6 +103,7 @@ namespace llvm {
     bool EnableTailMerge;
     bool EnableHoistCommonCode;
     bool UpdateLiveIns;
+    unsigned MinCommonTailLength;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineModuleInfo *MMI;
@@ -129,9 +134,9 @@ namespace llvm {
 
     bool TailMergeBlocks(MachineFunction &MF);
     bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
-                       MachineBasicBlock* PredBB);
+                       MachineBasicBlock* PredBB,
+                       unsigned MinCommonTailLength);
     void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
-    void computeLiveIns(MachineBasicBlock &MBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
@@ -150,7 +155,6 @@ namespace llvm {
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
-    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
 
     bool HoistCommonCode(MachineFunction &MF);
     bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
new file mode 100644
index 0000000..8b27570
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -0,0 +1,510 @@
+//===-- BranchRelaxation.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-relaxation"
+
+STATISTIC(NumSplit, "Number of basic blocks split");
+STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed");
+STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
+
+#define BRANCH_RELAX_NAME "Branch relaxation pass"
+
+namespace {
+class BranchRelaxation : public MachineFunctionPass {
+  /// BasicBlockInfo - Information about the offset and size of a single
+  /// basic block.
+  struct BasicBlockInfo {
+    /// Offset - Distance from the beginning of the function to the beginning
+    /// of this basic block.
+    ///
+    /// The offset is always aligned as required by the basic block.
+    unsigned Offset;
+
+    /// Size - Size of the basic block in bytes.  If the block contains
+    /// inline assembly, this is a worst case estimate.
+    ///
+    /// The size does not include any alignment padding whether from the
+    /// beginning of the block, or from an aligned jump table at the end.
+    unsigned Size;
+
+    BasicBlockInfo() : Offset(0), Size(0) {}
+
+    /// Compute the offset immediately following this block. \p MBB is the next
+    /// block.
+    unsigned postOffset(const MachineBasicBlock &MBB) const {
+      unsigned PO = Offset + Size;
+      unsigned Align = MBB.getAlignment();
+      if (Align == 0)
+        return PO;
+
+      unsigned AlignAmt = 1 << Align;
+      unsigned ParentAlign = MBB.getParent()->getAlignment();
+      if (Align <= ParentAlign)
+        return PO + OffsetToAlignment(PO, AlignAmt);
+
+      // The alignment of this MBB is larger than the function's alignment, so we
+      // can't tell whether or not it will insert nops. Assume that it will.
+      return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt);
+    }
+  };
+
+  SmallVector<BasicBlockInfo, 16> BlockInfo;
+  std::unique_ptr<RegScavenger> RS;
+  LivePhysRegs LiveRegs;
+
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  bool relaxBranchInstructions();
+  void scanFunction();
+
+  MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB);
+
+  MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
+                                           MachineBasicBlock *DestBB);
+  void adjustBlockOffsets(MachineBasicBlock &MBB);
+  bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
+
+  bool fixupConditionalBranch(MachineInstr &MI);
+  bool fixupUnconditionalBranch(MachineInstr &MI);
+  uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
+  unsigned getInstrOffset(const MachineInstr &MI) const;
+  void dumpBBs();
+  void verify();
+
+public:
+  static char ID;
+  BranchRelaxation() : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return BRANCH_RELAX_NAME;
+  }
+};
+
+}
+
+char BranchRelaxation::ID = 0;
+char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
+
+INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void BranchRelaxation::verify() {
+#ifndef NDEBUG
+  unsigned PrevNum = MF->begin()->getNumber();
+  for (MachineBasicBlock &MBB : *MF) {
+    unsigned Align = MBB.getAlignment();
+    unsigned Num = MBB.getNumber();
+    assert(BlockInfo[Num].Offset % (1u << Align) == 0);
+    assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
+    assert(BlockInfo[Num].Size == computeBlockSize(MBB));
+    PrevNum = Num;
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void BranchRelaxation::dumpBBs() {
+  for (auto &MBB : *MF) {
+    const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
+    dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+           << format("size=%#x\n", BBI.Size);
+  }
+}
+
+/// scanFunction - Do the initial scan of the function, building up
+/// information about each block.
+void BranchRelaxation::scanFunction() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineBasicBlock &MBB : *MF)
+    BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+
+  // Compute block offsets and known bits.
+  adjustBlockOffsets(*MF->begin());
+}
+
+/// computeBlockSize - Compute the size for MBB.
+uint64_t BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) const {
+  uint64_t Size = 0;
+  for (const MachineInstr &MI : MBB)
+    Size += TII->getInstSizeInBytes(MI);
+  return Size;
+}
+
+/// getInstrOffset - Return the current offset of the specified machine
+/// instruction from the start of the function.  This offset changes as stuff is
+/// moved around inside the function.
+unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const {
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+
+  return Offset;
+}
+
+void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+  unsigned PrevNum = Start.getNumber();
+  for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
+    unsigned Num = MBB.getNumber();
+    if (!Num) // block zero is never changed from offset zero.
+      continue;
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
+
+    PrevNum = Num;
+  }
+}
+
+  /// Insert a new empty basic block and insert it after \BB
+MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+      MF->CreateMachineBasicBlock(BB.getBasicBlock());
+  MF->insert(++BB.getIterator(), NewBB);
+
+  // Insert an entry into BlockInfo to align it properly with the block numbers.
+  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+  return NewBB;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
+                                                           MachineBasicBlock *DestBB) {
+  MachineBasicBlock *OrigBB = MI.getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+      MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MF->insert(++OrigBB->getIterator(), NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc());
+
+  // Insert an entry into BlockInfo to align it properly with the block numbers.
+  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+
+  NewBB->transferSuccessors(OrigBB);
+  OrigBB->addSuccessor(NewBB);
+  OrigBB->addSuccessor(DestBB);
+
+  // Cleanup potential unconditional branch to successor block.
+  // Note that updateTerminator may change the size of the blocks.
+  NewBB->updateTerminator();
+  OrigBB->updateTerminator();
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB);
+
+  // Figure out how large the NewMBB is. As the second half of the original
+  // block, it may contain a tablejump.
+  BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB);
+
+  // All BBOffsets following these blocks must be modified.
+  adjustBlockOffsets(*OrigBB);
+
+  // Need to fix live-in lists if we track liveness.
+  if (TRI->trackLivenessAfterRegAlloc(*MF))
+    computeLiveIns(LiveRegs, *TRI, *NewBB);
+
+  ++NumSplit;
+
+  return NewBB;
+}
+
+/// isBlockInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool BranchRelaxation::isBlockInRange(
+  const MachineInstr &MI, const MachineBasicBlock &DestBB) const {
+  int64_t BrOffset = getInstrOffset(MI);
+  int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+
+  if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
+    return true;
+
+  DEBUG(
+    dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber()
+           << " from BB#" << MI.getParent()->getNumber()
+           << " to " << DestOffset
+           << " offset " << DestOffset - BrOffset
+           << '\t' << MI
+  );
+
+  return false;
+}
+
+/// fixupConditionalBranch - Fix up a conditional branch whose destination is
+/// too far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+
+  bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+  assert(!Fail && "branches to be relaxed must be analyzable");
+  (void)Fail;
+
+  // Add an unconditional branch to the destination and invert the branch
+  // condition to jump over it:
+  // tbz L1
+  // =>
+  // tbnz L2
+  // b   L1
+  // L2:
+
+  if (FBB && isBlockInRange(MI, *FBB)) {
+    // Last MI in the BB is an unconditional branch. We can simply invert the
+    // condition and swap destinations:
+    // beq L1
+    // b   L2
+    // =>
+    // bne L2
+    // b   L1
+    DEBUG(dbgs() << "  Invert condition and swap "
+                    "its destination with " << MBB->back());
+
+    TII->reverseBranchCondition(Cond);
+    int OldSize = 0, NewSize = 0;
+    TII->removeBranch(*MBB, &OldSize);
+    TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize);
+
+    BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
+    return true;
+  } else if (FBB) {
+    // We need to split the basic block here to obtain two long-range
+    // unconditional branches.
+    auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+    MF->insert(++MBB->getIterator(), &NewBB);
+
+    // Insert an entry into BlockInfo to align it properly with the block
+    // numbers.
+    BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo());
+
+    unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size;
+    int NewBrSize;
+    TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize);
+    NewBBSize += NewBrSize;
+
+    // Update the successor lists according to the transformation to follow.
+    // Do it here since if there's no split, no update is needed.
+    MBB->replaceSuccessor(FBB, &NewBB);
+    NewBB.addSuccessor(FBB);
+  }
+
+  // We now have an appropriate fall-through block in place (either naturally or
+  // just created), so we can invert the condition.
+  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+
+  DEBUG(dbgs() << "  Insert B to BB#" << TBB->getNumber()
+               << ", invert condition and change dest. to BB#"
+               << NextBB.getNumber() << '\n');
+
+  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
+
+  // Insert a new conditional branch and a new unconditional branch.
+  int RemovedSize = 0;
+  TII->reverseBranchCondition(Cond);
+  TII->removeBranch(*MBB, &RemovedSize);
+  MBBSize -= RemovedSize;
+
+  int AddedSize = 0;
+  TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize);
+  MBBSize += AddedSize;
+
+  // Finally, keep the block offsets up to date.
+  adjustBlockOffsets(*MBB);
+  return true;
+}
+
+bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
+  MachineBasicBlock *MBB = MI.getParent();
+
+  unsigned OldBrSize = TII->getInstSizeInBytes(MI);
+  MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+
+  int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+  int64_t SrcOffset = getInstrOffset(MI);
+
+  assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+
+  BlockInfo[MBB->getNumber()].Size -= OldBrSize;
+
+  MachineBasicBlock *BranchBB = MBB;
+
+  // If this was an expanded conditional branch, there is already a single
+  // unconditional branch in a block.
+  if (!MBB->empty()) {
+    BranchBB = createNewBlockAfter(*MBB);
+
+    // Add live outs.
+    for (const MachineBasicBlock *Succ : MBB->successors()) {
+      for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins())
+        BranchBB->addLiveIn(LiveIn);
+    }
+
+    BranchBB->sortUniqueLiveIns();
+    BranchBB->addSuccessor(DestBB);
+    MBB->replaceSuccessor(DestBB, BranchBB);
+  }
+
+  DebugLoc DL = MI.getDebugLoc();
+  MI.eraseFromParent();
+  BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
+    *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+
+  adjustBlockOffsets(*MBB);
+  return true;
+}
+
+bool BranchRelaxation::relaxBranchInstructions() {
+  bool Changed = false;
+
+  // Relaxing branches involves creating new basic blocks, so re-eval
+  // end() for termination.
+  for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
+    MachineBasicBlock &MBB = *I;
+
+    // Empty block?
+    MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
+    if (Last == MBB.end())
+      continue;
+
+    // Expand the unconditional branch first if necessary. If there is a
+    // conditional branch, this will end up changing the branch destination of
+    // it to be over the newly inserted indirect branch block, which may avoid
+    // the need to try expanding the conditional branch first, saving an extra
+    // jump.
+    if (Last->isUnconditionalBranch()) {
+      // Unconditional branch destination might be unanalyzable, assume these
+      // are OK.
+      if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) {
+        if (!isBlockInRange(*Last, *DestBB)) {
+          fixupUnconditionalBranch(*Last);
+          ++NumUnconditionalRelaxed;
+          Changed = true;
+        }
+      }
+    }
+
+    // Loop over the conditional branches.
+    MachineBasicBlock::iterator Next;
+    for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+         J != MBB.end(); J = Next) {
+      Next = std::next(J);
+      MachineInstr &MI = *J;
+
+      if (MI.isConditionalBranch()) {
+        MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+        if (!isBlockInRange(MI, *DestBB)) {
+          if (Next != MBB.end() && Next->isConditionalBranch()) {
+            // If there are multiple conditional branches, this isn't an
+            // analyzable block. Split later terminators into a new block so
+            // each one will be analyzable.
+
+            splitBlockBeforeInstr(*Next, DestBB);
+          } else {
+            fixupConditionalBranch(MI);
+            ++NumConditionalRelaxed;
+          }
+
+          Changed = true;
+
+          // This may have modified all of the terminators, so start over.
+          Next = MBB.getFirstTerminator();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+
+  DEBUG(dbgs() << "***** BranchRelaxation *****\n");
+
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  TII = ST.getInstrInfo();
+
+  TRI = ST.getRegisterInfo();
+  if (TRI->trackLivenessAfterRegAlloc(*MF))
+    RS.reset(new RegScavenger());
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF->RenumberBlocks();
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block.
+  scanFunction();
+
+  DEBUG(dbgs() << "  Basic blocks before relaxation\n"; dumpBBs(););
+
+  bool MadeChange = false;
+  while (relaxBranchInstructions())
+    MadeChange = true;
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify();
+
+  DEBUG(dbgs() << "  Basic blocks after relaxation\n\n"; dumpBBs());
+
+  BlockInfo.clear();
+
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 7d67bcf..2e33f14 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -23,6 +23,8 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
@@ -64,6 +66,22 @@ void CCState::MarkAllocated(unsigned Reg) {
     UsedRegs[*AI/32] |= 1 << (*AI&31);
 }
 
+bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
+  if (!isAllocated(Reg))
+    return false;
+
+  for (auto const &ValAssign : Locs) {
+    if (ValAssign.isRegLoc()) {
+      for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
+           AI.isValid(); ++AI) {
+        if (*AI == Reg)
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Analyze an array of argument values,
 /// incorporating info about the formals into this state.
 void
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 6679819..4cf9b13 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,9 @@ using namespace llvm;
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
   initializeBranchFolderPassPass(Registry);
+  initializeBranchRelaxationPass(Registry);
   initializeCodeGenPreparePass(Registry);
+  initializeCountingFunctionInserterPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeDetectDeadLanesPass(Registry);
   initializeDwarfEHPreparePass(Registry);
@@ -53,6 +55,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachinePipelinerPass(Registry);
   initializeMachinePostDominatorTreePass(Registry);
   initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
@@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePostRASchedulerPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
+  initializeRAGreedyPass(Registry);
   initializeRegisterCoalescerPass(Registry);
   initializeRenameIndependentSubregsPass(Registry);
   initializeShrinkWrapPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ede4041..934b470 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -17,12 +17,16 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -118,6 +122,19 @@ static cl::opt<bool> DisablePreheaderProtect(
     "disable-preheader-prot", cl::Hidden, cl::init(false),
     cl::desc("Disable protection against removing loop preheaders"));
 
+static cl::opt<bool> ProfileGuidedSectionPrefix(
+    "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+    cl::desc("Use profile info to add section prefix for hot/cold functions"));
+
+static cl::opt<unsigned> FreqRatioToSkipMerge(
+    "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
+    cl::desc("Skip merging empty blocks if (frequency of empty block) / "
+             "(frequency of destination block) is greater than this ratio"));
+
+static cl::opt<bool> ForceSplitStore(
+    "force-split-store", cl::Hidden, cl::init(false),
+    cl::desc("Force store splitting no matter what the target query says."));
+
 namespace {
 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -130,6 +147,8 @@ class TypePromotionTransaction;
     const TargetTransformInfo *TTI;
     const TargetLibraryInfo *TLInfo;
     const LoopInfo *LI;
+    std::unique_ptr<BlockFrequencyInfo> BFI;
+    std::unique_ptr<BranchProbabilityInfo> BPI;
 
     /// As we scan instructions optimizing them, this is the next instruction
     /// to optimize. Transforms that can invalidate this should update it.
@@ -163,10 +182,11 @@ class TypePromotionTransaction;
       }
     bool runOnFunction(Function &F) override;
 
-    const char *getPassName() const override { return "CodeGen Prepare"; }
+    StringRef getPassName() const override { return "CodeGen Prepare"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       // FIXME: When we can selectively preserve passes, preserve the domtree.
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
@@ -175,8 +195,11 @@ class TypePromotionTransaction;
   private:
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
+    BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
     bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
+                                       bool isPreheader);
     bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
     bool optimizeInst(Instruction *I, bool& ModifiedDT);
     bool optimizeMemoryInst(Instruction *I, Value *Addr,
@@ -199,13 +222,15 @@ class TypePromotionTransaction;
                         unsigned CreatedInstCost);
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
-    void stripInvariantGroupMetadata(Instruction &I);
   };
 }
 
 char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
-                   "Optimize for code generation", false, false)
+INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+                         "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
+                       "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
   return new CodeGenPrepare(TM);
@@ -221,6 +246,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   // Clear per function information.
   InsertedInsts.clear();
   PromotedInsts.clear();
+  BFI.reset();
+  BPI.reset();
 
   ModifiedDT = false;
   if (TM)
@@ -230,6 +257,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   OptSize = F.optForSize();
 
+  if (ProfileGuidedSectionPrefix) {
+    ProfileSummaryInfo *PSI =
+        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    if (PSI->isFunctionEntryHot(&F))
+      F.setSectionPrefix(".hot");
+    else if (PSI->isFunctionEntryCold(&F))
+      F.setSectionPrefix(".cold");
+  }
+
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
@@ -364,6 +400,38 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
   return Changed;
 }
 
+/// Find a destination block from BB if BB is mergeable empty block.
+BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
+  // If this block doesn't end with an uncond branch, ignore it.
+  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isUnconditional())
+    return nullptr;
+
+  // If the instruction before the branch (skipping debug info) isn't a phi
+  // node, then other stuff is happening here.
+  BasicBlock::iterator BBI = BI->getIterator();
+  if (BBI != BB->begin()) {
+    --BBI;
+    while (isa<DbgInfoIntrinsic>(BBI)) {
+      if (BBI == BB->begin())
+        break;
+      --BBI;
+    }
+    if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+      return nullptr;
+  }
+
+  // Do not break infinite loops.
+  BasicBlock *DestBB = BI->getSuccessor(0);
+  if (DestBB == BB)
+    return nullptr;
+
+  if (!canMergeBlocks(BB, DestBB))
+    DestBB = nullptr;
+
+  return DestBB;
+}
+
 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
 /// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -382,46 +450,106 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
   // Note that this intentionally skips the entry block.
   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
     BasicBlock *BB = &*I++;
+    BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
+    if (!DestBB ||
+        !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
+      continue;
+
+    eliminateMostlyEmptyBlock(BB);
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
+                                                   BasicBlock *DestBB,
+                                                   bool isPreheader) {
+  // Do not delete loop preheaders if doing so would create a critical edge.
+  // Loop preheaders can be good locations to spill registers. If the
+  // preheader is deleted and we create a critical edge, registers may be
+  // spilled in the loop body instead.
+  if (!DisablePreheaderProtect && isPreheader &&
+      !(BB->getSinglePredecessor() &&
+        BB->getSinglePredecessor()->getSingleSuccessor()))
+    return false;
+
+  // Try to skip merging if the unique predecessor of BB is terminated by a
+  // switch or indirect branch instruction, and BB is used as an incoming block
+  // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
+  // add COPY instructions in the predecessor of BB instead of BB (if it is not
+  // merged). Note that the critical edge created by merging such blocks wont be
+  // split in MachineSink because the jump table is not analyzable. By keeping
+  // such empty block (BB), ISel will place COPY instructions in BB, not in the
+  // predecessor of BB.
+  BasicBlock *Pred = BB->getUniquePredecessor();
+  if (!Pred ||
+      !(isa<SwitchInst>(Pred->getTerminator()) ||
+        isa<IndirectBrInst>(Pred->getTerminator())))
+    return true;
+
+  if (BB->getTerminator() != BB->getFirstNonPHI())
+    return true;
+
+  // We use a simple cost heuristic which determine skipping merging is
+  // profitable if the cost of skipping merging is less than the cost of
+  // merging : Cost(skipping merging) < Cost(merging BB), where the
+  // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
+  // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
+  // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
+  //   Freq(Pred) / Freq(BB) > 2.
+  // Note that if there are multiple empty blocks sharing the same incoming
+  // value for the PHIs in the DestBB, we consider them together. In such
+  // case, Cost(merging BB) will be the sum of their frequencies.
+
+  if (!isa<PHINode>(DestBB->begin()))
+    return true;
 
-    // If this block doesn't end with an uncond branch, ignore it.
-    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-    if (!BI || !BI->isUnconditional())
+  SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+
+  // Find all other incoming blocks from which incoming values of all PHIs in
+  // DestBB are the same as the ones from BB.
+  for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
+       ++PI) {
+    BasicBlock *DestBBPred = *PI;
+    if (DestBBPred == BB)
       continue;
 
-    // If the instruction before the branch (skipping debug info) isn't a phi
-    // node, then other stuff is happening here.
-    BasicBlock::iterator BBI = BI->getIterator();
-    if (BBI != BB->begin()) {
-      --BBI;
-      while (isa<DbgInfoIntrinsic>(BBI)) {
-        if (BBI == BB->begin())
-          break;
-        --BBI;
+    bool HasAllSameValue = true;
+    BasicBlock::const_iterator DestBBI = DestBB->begin();
+    while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
+      if (DestPN->getIncomingValueForBlock(BB) !=
+          DestPN->getIncomingValueForBlock(DestBBPred)) {
+        HasAllSameValue = false;
+        break;
       }
-      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
-        continue;
     }
+    if (HasAllSameValue)
+      SameIncomingValueBBs.insert(DestBBPred);
+  }
 
-    // Do not break infinite loops.
-    BasicBlock *DestBB = BI->getSuccessor(0);
-    if (DestBB == BB)
-      continue;
+  // See if all BB's incoming values are same as the value from Pred. In this
+  // case, no reason to skip merging because COPYs are expected to be place in
+  // Pred already.
+  if (SameIncomingValueBBs.count(Pred))
+    return true;
 
-    if (!canMergeBlocks(BB, DestBB))
-      continue;
+  if (!BFI) {
+    Function &F = *BB->getParent();
+    LoopInfo LI{DominatorTree(F)};
+    BPI.reset(new BranchProbabilityInfo(F, LI));
+    BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+  }
 
-    // Do not delete loop preheaders if doing so would create a critical edge.
-    // Loop preheaders can be good locations to spill registers. If the
-    // preheader is deleted and we create a critical edge, registers may be
-    // spilled in the loop body instead.
-    if (!DisablePreheaderProtect && Preheaders.count(BB) &&
-        !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
-     continue;
+  BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
+  BlockFrequency BBFreq = BFI->getBlockFreq(BB);
 
-    eliminateMostlyEmptyBlock(BB);
-    MadeChange = true;
-  }
-  return MadeChange;
+  for (auto SameValueBB : SameIncomingValueBBs)
+    if (SameValueBB->getUniquePredecessor() == Pred &&
+        DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
+      BBFreq += BFI->getBlockFreq(SameValueBB);
+
+  return PredFreq.getFrequency() <=
+         BBFreq.getFrequency() * FreqRatioToSkipMerge;
 }
 
 /// Return true if we can merge BB into DestBB if there is a single
@@ -805,6 +933,14 @@ static bool SinkCast(CastInst *CI) {
 ///
 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
                                        const DataLayout &DL) {
+  // Sink only "cheap" (or nop) address-space casts.  This is a weaker condition
+  // than sinking only nop casts, but is helpful on some platforms.
+  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
+    if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
+                                  ASC->getDestAddressSpace()))
+      return false;
+  }
+
   // If this is a noop copy,
   EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(DL, CI->getType());
@@ -925,6 +1061,8 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
       InsertedCmp =
           CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
                           CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+      // Propagate the debug info.
+      InsertedCmp->setDebugLoc(CI->getDebugLoc());
     }
 
     // Replace a use of the cmp with a use of the new cmp.
@@ -1814,18 +1952,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
     default: break;
     case Intrinsic::objectsize: {
       // Lower all uses of llvm.objectsize.*
-      uint64_t Size;
-      Type *ReturnTy = CI->getType();
-      Constant *RetVal = nullptr;
-      ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
-      ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
-      if (getObjectSize(II->getArgOperand(0),
-                        Size, *DL, TLInfo, false, Mode)) {
-        RetVal = ConstantInt::get(ReturnTy, Size);
-      } else {
-        RetVal = ConstantInt::get(ReturnTy,
-                                  Mode == ObjSizeMode::Min ? 0 : -1ULL);
-      }
+      ConstantInt *RetVal =
+          lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
       // Substituting this can cause recursive simplifications, which can
       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
       // happens.
@@ -1963,13 +2091,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (!TLI)
     return false;
 
-  ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
-  if (!RI)
+  ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
+  if (!RetI)
     return false;
 
   PHINode *PN = nullptr;
   BitCastInst *BCI = nullptr;
-  Value *V = RI->getReturnValue();
+  Value *V = RetI->getReturnValue();
   if (V) {
     BCI = dyn_cast<BitCastInst>(V);
     if (BCI)
@@ -1983,14 +2111,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (PN && PN->getParent() != BB)
     return false;
 
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  // See llvm::isInTailCallPosition().
-  const Function *F = BB->getParent();
-  AttributeSet CallerAttrs = F->getAttributes();
-  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
-      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
-    return false;
-
   // Make sure there are no instructions between the PHI and return, or that the
   // return is the first instruction in the block.
   if (PN) {
@@ -1999,24 +2119,26 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
     if (&*BI == BCI)
       // Also skip over the bitcast.
       ++BI;
-    if (&*BI != RI)
+    if (&*BI != RetI)
       return false;
   } else {
     BasicBlock::iterator BI = BB->begin();
     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
-    if (&*BI != RI)
+    if (&*BI != RetI)
       return false;
   }
 
   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
   /// call.
+  const Function *F = BB->getParent();
   SmallVector<CallInst*, 4> TailCalls;
   if (PN) {
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
       // Make sure the phi value is indeed produced by the tail call.
       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
-          TLI->mayBeEmittedAsTailCall(CI))
+          TLI->mayBeEmittedAsTailCall(CI) &&
+          attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCalls.push_back(CI);
     }
   } else {
@@ -2033,7 +2155,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
         continue;
 
       CallInst *CI = dyn_cast<CallInst>(&*RI);
-      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+          attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCalls.push_back(CI);
     }
   }
@@ -2060,7 +2183,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
       continue;
 
     // Duplicate the return into CallBB.
-    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
     ModifiedDT = Changed = true;
     ++NumRetsDup;
   }
@@ -3237,7 +3360,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
     int64_t ConstantOffset = 0;
     gep_type_iterator GTI = gep_type_begin(AddrInst);
     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx =
           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
@@ -3665,8 +3788,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     TPT.rollback(LastKnownGood);
 
     // If the match didn't cover I, then it won't be shared by it.
-    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
-                  I) == MatchedAddrModeInsts.end())
+    if (!is_contained(MatchedAddrModeInsts, I))
       return false;
 
     MatchedAddrModeInsts.clear();
@@ -3791,18 +3913,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   }
   TPT.commit();
 
-  // Check to see if any of the instructions supersumed by this addr mode are
-  // non-local to I's BB.
-  bool AnyNonLocal = false;
-  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
-    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
-      AnyNonLocal = true;
-      break;
-    }
-  }
-
   // If all the instructions matched are already in this BB, don't do anything.
-  if (!AnyNonLocal) {
+  if (none_of(AddrModeInsts, [&](Value *V) {
+        return IsNonLocalValue(V, MemoryInst->getParent());
+      })) {
     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
     return false;
   }
@@ -4217,6 +4331,10 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
 /// promotions apply.
 ///
 bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
+  // ExtLoad formation infrastructure requires TLI to be effective.
+  if (!TLI)
+    return false;
+
   // Try to promote a chain of computation if it allows to form
   // an extended load.
   TypePromotionTransaction TPT;
@@ -4246,7 +4364,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
 
   // If the load has other users and the truncate is not free, this probably
   // isn't worthwhile.
-  if (!LI->hasOneUse() && TLI &&
+  if (!LI->hasOneUse() &&
       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
       !TLI->isTruncateFree(I->getType(), LI->getType())) {
     I = OldExt;
@@ -4262,7 +4380,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
     assert(isa<SExtInst>(I) && "Unexpected ext type!");
     LType = ISD::SEXTLOAD;
   }
-  if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+  if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
     I = OldExt;
     TPT.rollback(LastKnownGood);
     return false;
@@ -4273,6 +4391,14 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
   TPT.commit();
   I->removeFromParent();
   I->insertAfter(LI);
+  // CGP does not check if the zext would be speculatively executed when moved
+  // to the same basic block as the load. Preserving its original location would
+  // pessimize the debugging experience, as well as negatively impact the 
+  // quality of sample pgo. We don't want to use "line 0" as that has a
+  // size cost in the line-table section and logically the zext can be seen as
+  // part of the load. Therefore we conservatively reuse the same debug location
+  // for the load and the zext.
+  I->setDebugLoc(LI->getDebugLoc());
   ++NumExtsMoved;
   return true;
 }
@@ -4583,10 +4709,45 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
   return false;
 }
 
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *getTrueOrFalseValue(
+    SelectInst *SI, bool isTrue,
+    const SmallPtrSet<const Instruction *, 2> &Selects) {
+  Value *V;
+
+  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+       DefSI = dyn_cast<SelectInst>(V)) {
+    assert(DefSI->getCondition() == SI->getCondition() &&
+           "The condition of DefSI does not match with SI");
+    V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+  }
+  return V;
+}
 
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+  // Find all consecutive select instructions that share the same condition.
+  SmallVector<SelectInst *, 2> ASI;
+  ASI.push_back(SI);
+  for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
+       It != SI->getParent()->end(); ++It) {
+    SelectInst *I = dyn_cast<SelectInst>(&*It);
+    if (I && SI->getCondition() == I->getCondition()) {
+      ASI.push_back(I);
+    } else {
+      break;
+    }
+  }
+
+  SelectInst *LastSI = ASI.back();
+  // Increment the current iterator to skip all the rest of select instructions
+  // because they will be either "not lowered" or "all lowered" to branch.
+  CurInstIterator = std::next(LastSI->getIterator());
+
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
 
   // Can we convert the 'select' to CF ?
@@ -4633,7 +4794,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
 
   // First, we split the block containing the select into 2 blocks.
   BasicBlock *StartBlock = SI->getParent();
-  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
   BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
 
   // Delete the unconditional branch that was just created by the split.
@@ -4643,22 +4804,30 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // At least one will become an actual new basic block.
   BasicBlock *TrueBlock = nullptr;
   BasicBlock *FalseBlock = nullptr;
+  BranchInst *TrueBranch = nullptr;
+  BranchInst *FalseBranch = nullptr;
 
   // Sink expensive instructions into the conditional blocks to avoid executing
   // them speculatively.
-  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
-    TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
-                                   EndBlock->getParent(), EndBlock);
-    auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
-    auto *TrueInst = cast<Instruction>(SI->getTrueValue());
-    TrueInst->moveBefore(TrueBranch);
-  }
-  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
-    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
-                                    EndBlock->getParent(), EndBlock);
-    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
-    auto *FalseInst = cast<Instruction>(SI->getFalseValue());
-    FalseInst->moveBefore(FalseBranch);
+  for (SelectInst *SI : ASI) {
+    if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+      if (TrueBlock == nullptr) {
+        TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+                                       EndBlock->getParent(), EndBlock);
+        TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+      }
+      auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+      TrueInst->moveBefore(TrueBranch);
+    }
+    if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+      if (FalseBlock == nullptr) {
+        FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+                                        EndBlock->getParent(), EndBlock);
+        FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+      }
+      auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+      FalseInst->moveBefore(FalseBranch);
+    }
   }
 
   // If there was nothing to sink, then arbitrarily choose the 'false' side
@@ -4677,28 +4846,42 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // of the condition, it means that side of the branch goes to the end block
   // directly and the path originates from the start block from the point of
   // view of the new PHI.
+  BasicBlock *TT, *FT;
   if (TrueBlock == nullptr) {
-    BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+    TT = EndBlock;
+    FT = FalseBlock;
     TrueBlock = StartBlock;
   } else if (FalseBlock == nullptr) {
-    BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+    TT = TrueBlock;
+    FT = EndBlock;
     FalseBlock = StartBlock;
   } else {
-    BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+    TT = TrueBlock;
+    FT = FalseBlock;
+  }
+  IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
+
+  SmallPtrSet<const Instruction *, 2> INS;
+  INS.insert(ASI.begin(), ASI.end());
+  // Use reverse iterator because later select may use the value of the
+  // earlier select, and we need to propagate value through earlier select
+  // to get the PHI operand.
+  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+    SelectInst *SI = *It;
+    // The select itself is replaced with a PHI Node.
+    PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+    PN->takeName(SI);
+    PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+    PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+
+    SI->replaceAllUsesWith(PN);
+    SI->eraseFromParent();
+    INS.erase(SI);
+    ++NumSelectsExpanded;
   }
-
-  // The select itself is replaced with a PHI Node.
-  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
-  PN->takeName(SI);
-  PN->addIncoming(SI->getTrueValue(), TrueBlock);
-  PN->addIncoming(SI->getFalseValue(), FalseBlock);
-
-  SI->replaceAllUsesWith(PN);
-  SI->eraseFromParent();
 
   // Instruct OptimizeBlock to skip to the next block.
   CurInstIterator = StartBlock->end();
-  ++NumSelectsExpanded;
   return true;
 }
 
@@ -5179,6 +5362,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
   return false;
 }
 
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+///   (store (or (zext (bitcast F to i32) to i64),
+///              (shl (zext I to i64), 32)), addr)  -->
+///   (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8},  i32 store --> two i16 stores.
+/// For pair of {i8, i8},   i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+///   void goo(const std::pair<int, float> &);
+///   hoo() {
+///     ...
+///     goo(std::make_pair(tmp, ftmp));
+///     ...
+///   }
+///
+/// Although we already have similar splitting in DAG Combine, we duplicate
+/// it in CodeGenPrepare to catch the case in which pattern is across
+/// multiple BBs. The logic in DAG Combine is kept to catch case generated
+/// during code expansion.
+static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
+                                const TargetLowering &TLI) {
+  // Handle simple but common cases only.
+  Type *StoreType = SI.getValueOperand()->getType();
+  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+      DL.getTypeSizeInBits(StoreType) == 0)
+    return false;
+
+  unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
+  Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
+  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
+      DL.getTypeSizeInBits(SplitStoreType))
+    return false;
+
+  // Match the following patterns:
+  // (store (or (zext LValue to i64),
+  //            (shl (zext HValue to i64), 32)), HalfValBitSize)
+  //  or
+  // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
+  //            (zext LValue to i64),
+  // Expect both operands of OR and the first operand of SHL have only
+  // one use.
+  Value *LValue, *HValue;
+  if (!match(SI.getValueOperand(),
+             m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
+                    m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
+                                   m_SpecificInt(HalfValBitSize))))))
+    return false;
+
+  // Check LValue and HValue are int with size less or equal than 32.
+  if (!LValue->getType()->isIntegerTy() ||
+      DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
+      !HValue->getType()->isIntegerTy() ||
+      DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
+    return false;
+
+  // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
+  // as the input of target query.
+  auto *LBC = dyn_cast<BitCastInst>(LValue);
+  auto *HBC = dyn_cast<BitCastInst>(HValue);
+  EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
+                  : EVT::getEVT(LValue->getType());
+  EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
+                   : EVT::getEVT(HValue->getType());
+  if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+    return false;
+
+  // Start to split store.
+  IRBuilder<> Builder(SI.getContext());
+  Builder.SetInsertPoint(&SI);
+
+  // If LValue/HValue is a bitcast in another BB, create a new one in current
+  // BB so it may be merged with the splitted stores by dag combiner.
+  if (LBC && LBC->getParent() != SI.getParent())
+    LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
+  if (HBC && HBC->getParent() != SI.getParent())
+    HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
+
+  auto CreateSplitStore = [&](Value *V, bool Upper) {
+    V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
+    Value *Addr = Builder.CreateBitCast(
+        SI.getOperand(1),
+        SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+    if (Upper)
+      Addr = Builder.CreateGEP(
+          SplitStoreType, Addr,
+          ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+    Builder.CreateAlignedStore(
+        V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+  };
+
+  CreateSplitStore(LValue, false);
+  CreateSplitStore(HValue, true);
+
+  // Delete the old store.
+  SI.eraseFromParent();
+  return true;
+}
+
 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
@@ -5232,7 +5526,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
       return OptimizeCmpExpression(CI, TLI);
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    stripInvariantGroupMetadata(*LI);
+    LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
     if (TLI) {
       bool Modified = optimizeLoadExt(LI);
       unsigned AS = LI->getPointerAddressSpace();
@@ -5243,7 +5537,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   }
 
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    stripInvariantGroupMetadata(*SI);
+    if (TLI && splitMergedValStore(*SI, *DL, *TLI))
+      return true;
+    SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
     if (TLI) {
       unsigned AS = SI->getPointerAddressSpace();
       return optimizeMemoryInst(I, SI->getOperand(1),
@@ -5542,7 +5838,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
     // incoming edge to the PHI nodes, because both branch instructions target
     // now the same successor. Depending on the original branch condition
     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
-    // we perfrom the correct update for the PHI nodes.
+    // we perform the correct update for the PHI nodes.
     // This doesn't change the successor order of the just created branch
     // instruction (or any other instruction).
     if (Opc == Instruction::Or)
@@ -5649,8 +5945,3 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
   }
   return MadeChange;
 }
-
-void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
-  if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
-    I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
-}
diff --git a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
new file mode 100644
index 0000000..1e46a7a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
@@ -0,0 +1,62 @@
+//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Insert calls to counter functions, such as mcount, intended to be called
+// once per function, at the beginning of each function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  struct CountingFunctionInserter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CountingFunctionInserter() : FunctionPass(ID) {
+      initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<GlobalsAAWrapperPass>();
+    }
+
+    bool runOnFunction(Function &F) override {
+      std::string CountingFunctionName =
+        F.getFnAttribute("counting-function").getValueAsString();
+      if (CountingFunctionName.empty())
+        return false;
+
+      Type *VoidTy = Type::getVoidTy(F.getContext());
+      Constant *CountingFn =
+        F.getParent()->getOrInsertFunction(CountingFunctionName,
+                                           VoidTy, nullptr);
+      CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
+      return true;
+    }
+  };
+  
+  char CountingFunctionInserter::ID = 0;
+}
+
+INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", 
+                "Inserts calls to mcount-like functions", false, false)
+
+//===----------------------------------------------------------------------===//
+//
+// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createCountingFunctionInserterPass() {
+  return new CountingFunctionInserter();
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a0189a1..5d60c30 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -69,8 +69,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Mark live-out callee-saved registers. In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  BitVector Pristine = MFI->getPristineRegs(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI.getPristineRegs(MF);
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     if (!IsReturnBlock && !Pristine.test(*I)) continue;
     for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 2386af9..7b1b2d6 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -31,9 +31,14 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
+static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
+  cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+static unsigned InstrCount = 0;
+
 // --------------------------------------------------------------------
 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
 
@@ -218,6 +223,13 @@ VLIWPacketizerList::~VLIWPacketizerList() {
 // End the current packet, bundle packet instructions and reset DFA state.
 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
                                    MachineBasicBlock::iterator MI) {
+  DEBUG({
+    if (!CurrentPacketMIs.empty()) {
+      dbgs() << "Finalizing packet:\n";
+      for (MachineInstr *MI : CurrentPacketMIs)
+        dbgs() << " * " << *MI;
+    }
+  });
   if (CurrentPacketMIs.size() > 1) {
     MachineInstr &MIFirst = *CurrentPacketMIs.front();
     finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
@@ -249,8 +261,17 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
   for (SUnit &SU : VLIWScheduler->SUnits)
     MIToSUnit[SU.getInstr()] = &SU;
 
+  bool LimitPresent = InstrLimit.getPosition();
+
   // The main packetizer loop.
   for (; BeginItr != EndItr; ++BeginItr) {
+    if (LimitPresent) {
+      if (InstrCount >= InstrLimit) {
+        EndItr = BeginItr;
+        break;
+      }
+      InstrCount++;
+    }
     MachineInstr &MI = *BeginItr;
     initPacketizerState();
 
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0b8dc7a..17c229a 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -122,7 +122,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     // liveness as we go.
     for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
          MIE = MBB.rend(); MII != MIE; ) {
-      MachineInstr *MI = &*MII;
+      MachineInstr *MI = &*MII++;
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
@@ -133,9 +133,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         MI->eraseFromParentAndMarkDBGValuesForRemoval();
         AnyChanges = true;
         ++NumDeletes;
-        MIE = MBB.rend();
-        // MII is now pointing to the next instruction to process,
-        // so don't increment it.
         continue;
       }
 
@@ -169,10 +166,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           }
         }
       }
-
-      // We didn't delete the current instruction, so increment MII to
-      // the next one.
-      ++MII;
     }
   }
 
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 1d9e79c..a7ba694 100644
--- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -63,7 +63,7 @@ public:
   static char ID;
   DetectDeadLanes() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "Detect Dead Lanes"; }
+  StringRef getPassName() const override { return "Detect Dead Lanes"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -210,7 +210,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
   VRegInfo &MORegInfo = VRegInfos[MORegIdx];
   LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
   // Any change at all?
-  if ((UsedLanes & ~PrevUsedLanes) == 0)
+  if ((UsedLanes & ~PrevUsedLanes).none())
     return;
 
   // Set UsedLanes and remember instruction for further propagation.
@@ -303,7 +303,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
   VRegInfo &RegInfo = VRegInfos[DefRegIdx];
   LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
   // Any change at all?
-  if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+  if ((DefinedLanes & ~PrevDefinedLanes).none())
     return;
 
   RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
@@ -356,7 +356,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
   // Live-In or unused registers have no definition but are considered fully
   // defined.
   if (!MRI->hasOneDef(Reg))
-    return ~0u;
+    return LaneBitmask::getAll();
 
   const MachineOperand &Def = *MRI->def_begin(Reg);
   const MachineInstr &DefMI = *Def.getParent();
@@ -368,7 +368,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     PutInWorklist(RegIdx);
 
     if (Def.isDead())
-      return 0;
+      return LaneBitmask::getNone();
 
     // COPY/PHI can copy across unrelated register classes (example: float/int)
     // with incompatible subregister structure. Do not include these in the
@@ -376,7 +376,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
 
     // Determine initially DefinedLanes.
-    LaneBitmask DefinedLanes = 0;
+    LaneBitmask DefinedLanes;
     for (const MachineOperand &MO : DefMI.uses()) {
       if (!MO.isReg() || !MO.readsReg())
         continue;
@@ -386,9 +386,9 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
 
       LaneBitmask MODefinedLanes;
       if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
-        MODefinedLanes = ~0u;
+        MODefinedLanes = LaneBitmask::getAll();
       } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
-        MODefinedLanes = ~0u;
+        MODefinedLanes = LaneBitmask::getAll();
       } else {
         assert(TargetRegisterInfo::isVirtualRegister(MOReg));
         if (MRI->hasOneDef(MOReg)) {
@@ -410,7 +410,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     return DefinedLanes;
   }
   if (DefMI.isImplicitDef() || Def.isDead())
-    return 0;
+    return LaneBitmask::getNone();
 
   assert(Def.getSubReg() == 0 &&
          "Should not have subregister defs in machine SSA phase");
@@ -418,7 +418,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
 }
 
 LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
-  LaneBitmask UsedLanes = 0;
+  LaneBitmask UsedLanes = LaneBitmask::getNone();
   for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     if (!MO.readsReg())
       continue;
@@ -462,7 +462,7 @@ bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
                                         const VRegInfo &RegInfo) const {
   unsigned SubReg = MO.getSubReg();
   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
-  return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0;
+  return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
 }
 
 bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
@@ -482,7 +482,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
 
   const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
   LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
-  if (UsedLanes != 0)
+  if (UsedLanes.any())
     return false;
 
   unsigned MOReg = MO.getReg();
@@ -546,7 +546,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
           continue;
         unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
         const VRegInfo &RegInfo = VRegInfos[RegIdx];
-        if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) {
+        if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
           DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
           MO.setIsDead();
         }
@@ -577,12 +577,12 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
   // register coalescer cannot deal with hidden dead defs. However without
   // subregister liveness enabled, the expected benefits of this pass are small
   // so we safe the compile time.
-  if (!MF.getSubtarget().enableSubRegLiveness()) {
+  MRI = &MF.getRegInfo();
+  if (!MRI->subRegLivenessEnabled()) {
     DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
     return false;
   }
 
-  MRI = &MF.getRegInfo();
   TRI = MRI->getTargetRegisterInfo();
 
   unsigned NumVirtRegs = MRI->getNumVirtRegs();
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index eae78a9..38af19a 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -71,7 +71,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Exception handling preparation";
     }
   };
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 8c96124..7291727 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -547,7 +547,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
   // Fix up Head's terminators.
   // It should become a single branch or a fallthrough.
   DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
-  TII->RemoveBranch(*Head);
+  TII->removeBranch(*Head);
 
   // Erase the now empty conditional blocks. It is likely that Head can fall
   // through to Tail, and we can join the two blocks.
@@ -574,7 +574,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
     // We need a branch to Tail, let code placement work it out later.
     DEBUG(dbgs() << "Converting to unconditional branch.\n");
     SmallVector<MachineOperand, 0> EmptyCond;
-    TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
+    TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
     Head->addSuccessor(Tail);
   }
   DEBUG(dbgs() << *Head);
@@ -602,7 +602,7 @@ public:
   EarlyIfConverter() : MachineFunctionPass(ID) {}
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override { return "Early If-Conversion"; }
+  StringRef getPassName() const override { return "Early If-Conversion"; }
 
 private:
   bool tryConvertIf(MachineBasicBlock*);
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
index aea7c31..b3a25544 100644
--- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -57,8 +58,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   Blocks.resize(getNumBundles());
 
   for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
-    unsigned b0 = getBundle(i, 0);
-    unsigned b1 = getBundle(i, 1);
+    unsigned b0 = getBundle(i, false);
+    unsigned b1 = getBundle(i, true);
     Blocks[b0].push_back(i);
     if (b1 != b0)
       Blocks[b1].push_back(i);
@@ -69,6 +70,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
 
 /// Specialize WriteGraph, the standard implementation won't work.
 namespace llvm {
+
 template<>
 raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
                           bool ShortNames,
@@ -89,7 +91,8 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-}
+
+} // end namespace llvm
 
 /// view - Visualize the annotated bipartite CFG with Graphviz.
 void EdgeBundles::view() const {
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 566b8d5..32c57e3 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -137,6 +138,7 @@ class ExeDepsFix : public MachineFunctionPass {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  RegisterClassInfo RegClassInfo;
   std::vector<SmallVector<int, 1>> AliasMap;
   const unsigned NumRegs;
   LiveReg *LiveRegs;
@@ -170,12 +172,10 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
-    return "Execution dependency fix";
-  }
+  StringRef getPassName() const override { return "Execution dependency fix"; }
 
 private:
   iterator_range<SmallVectorImpl<int>::const_iterator>
@@ -203,6 +203,8 @@ private:
   void processDefs(MachineInstr*, bool Kill);
   void visitSoftInstr(MachineInstr*, unsigned mask);
   void visitHardInstr(MachineInstr*, unsigned domain);
+  void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+                                unsigned Pref);
   bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
   void processUndefReads(MachineBasicBlock*);
 };
@@ -473,6 +475,60 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
   processDefs(MI, !DomP.first);
 }
 
+/// \brief Helps avoid false dependencies on undef registers by updating the
+/// machine instructions' undef operand to use a register that the instruction
+/// is truly dependent on, or use a register with clearance higher than Pref.
+void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+                                          unsigned Pref) {
+  MachineOperand &MO = MI->getOperand(OpIdx);
+  assert(MO.isUndef() && "Expected undef machine operand");
+
+  unsigned OriginalReg = MO.getReg();
+
+  // Update only undef operands that are mapped to one register.
+  if (AliasMap[OriginalReg].size() != 1)
+    return;
+
+  // Get the undef operand's register class
+  const TargetRegisterClass *OpRC =
+      TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+
+  // If the instruction has a true dependency, we can hide the false depdency
+  // behind it.
+  for (MachineOperand &CurrMO : MI->operands()) {
+    if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
+        !OpRC->contains(CurrMO.getReg()))
+      continue;
+    // We found a true dependency - replace the undef register with the true
+    // dependency.
+    MO.setReg(CurrMO.getReg());
+    return;
+  }
+
+  // Go over all registers in the register class and find the register with
+  // max clearance or clearance higher than Pref.
+  unsigned MaxClearance = 0;
+  unsigned MaxClearanceReg = OriginalReg;
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
+  for (auto Reg : Order) {
+    assert(AliasMap[Reg].size() == 1 &&
+           "Reg is expected to be mapped to a single index");
+    int RCrx = *regIndices(Reg).begin();
+    unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
+    if (Clearance <= MaxClearance)
+      continue;
+    MaxClearance = Clearance;
+    MaxClearanceReg = Reg;
+
+    if (MaxClearance > Pref)
+      break;
+  }
+
+  // Update the operand if we found a register with better clearance.
+  if (MaxClearanceReg != OriginalReg)
+    MO.setReg(MaxClearanceReg);
+}
+
 /// \brief Return true to if it makes sense to break dependence on a partial def
 /// or undef use.
 bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
@@ -510,6 +566,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
   unsigned OpNum;
   unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
   if (Pref) {
+    pickBestRegisterForUndef(MI, OpNum, Pref);
     if (shouldBreakDependence(MI, OpNum, Pref))
       UndefReads.push_back(std::make_pair(MI, OpNum));
   }
@@ -520,8 +577,6 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
       continue;
-    if (MO.isImplicit())
-      break;
     if (MO.isUse())
       continue;
     for (int rx : regIndices(MO.getReg())) {
@@ -557,7 +612,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
     return;
 
   // Collect this block's live out register units.
-  LiveRegSet.init(TRI);
+  LiveRegSet.init(*TRI);
   // We do not need to care about pristine registers as they are just preserved
   // but not actually used in the function.
   LiveRegSet.addLiveOutsNoPristines(*MBB);
@@ -652,9 +707,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
 
   // Kill off any remaining uses that don't match available, and build a list of
   // incoming DomainValues that we want to merge.
-  SmallVector<LiveReg, 4> Regs;
-  for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
-    int rx = *i;
+  SmallVector<const LiveReg *, 4> Regs;
+  for (int rx : used) {
     assert(LiveRegs && "no space allocated for live registers");
     const LiveReg &LR = LiveRegs[rx];
     // This useless DomainValue could have been missed above.
@@ -663,16 +717,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
       continue;
     }
     // Sorted insertion.
-    bool Inserted = false;
-    for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
-           i != e && !Inserted; ++i) {
-      if (LR.Def < i->Def) {
-        Inserted = true;
-        Regs.insert(i, LR);
-      }
-    }
-    if (!Inserted)
-      Regs.push_back(LR);
+    auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
+                              [](const LiveReg *LHS, const LiveReg *RHS) {
+                                return LHS->Def < RHS->Def;
+                              });
+    Regs.insert(I, &LR);
   }
 
   // doms are now sorted in order of appearance. Try to merge them all, giving
@@ -680,14 +729,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
   DomainValue *dv = nullptr;
   while (!Regs.empty()) {
     if (!dv) {
-      dv = Regs.pop_back_val().Value;
+      dv = Regs.pop_back_val()->Value;
       // Force the first dv to match the current instruction.
       dv->AvailableDomains = dv->getCommonDomains(available);
       assert(dv->AvailableDomains && "Domain should have been filtered");
       continue;
     }
 
-    DomainValue *Latest = Regs.pop_back_val().Value;
+    DomainValue *Latest = Regs.pop_back_val()->Value;
     // Skip already merged values.
     if (Latest == dv || Latest->Next)
       continue;
@@ -731,6 +780,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = MF->getSubtarget().getInstrInfo();
   TRI = MF->getSubtarget().getRegisterInfo();
+  RegClassInfo.runOnMachineFunction(mf);
   LiveRegs = nullptr;
   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
 
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index b16f81c..d61afad 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -30,7 +30,7 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index c8116a4..be21c73 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -32,7 +32,7 @@ class Printer : public FunctionPass {
 public:
   explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   bool runOnFunction(Function &F) override;
@@ -87,7 +87,7 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-const char *Printer::getPassName() const {
+StringRef Printer::getPassName() const {
   return "Print Garbage Collector Information";
 }
 
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index bb8cfa1..d183c7f 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,6 +14,8 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 using namespace llvm;
 
+LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
+
 GCMetadataPrinter::GCMetadataPrinter() {}
 
 GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 326adab..3524654 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -45,7 +45,7 @@ public:
   static char ID;
 
   LowerIntrinsics();
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   bool doInitialization(Module &M) override;
@@ -93,7 +93,7 @@ LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
   initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
 }
 
-const char *LowerIntrinsics::getPassName() const {
+StringRef LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
 }
 
@@ -316,7 +316,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
        RI != FI->roots_end();) {
     // If the root references a dead object, no need to keep it.
-    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+    if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) {
       RI = FI->removeStackRoot(RI);
     } else {
       unsigned FrameReg; // FIXME: surely GCRoot ought to store the
@@ -338,11 +338,11 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
 
   // Find the size of the stack frame.  There may be no correct static frame
   // size, we use UINT64_MAX to represent this.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+  const bool DynamicFrameSize = MFI.hasVarSizedObjects() ||
     RegInfo->needsStackRealignment(MF);
-  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize());
 
   // Find all safe points.
   if (FI->getStrategy().needsSafePoints())
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 554d326..31ab86f 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -16,6 +16,8 @@
 
 using namespace llvm;
 
+LLVM_INSTANTIATE_REGISTRY(GCRegistry)
+
 GCStrategy::GCStrategy()
     : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
       CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
new file mode 100644
index 0000000..1321221
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -0,0 +1,170 @@
+//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements some simple delegations needed for call lowering.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+bool CallLowering::lowerCall(
+    MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+    ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+  auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+
+  // First step is to marshall all the function's parameters into the correct
+  // physregs and memory locations. Gather the sequence of argument types that
+  // we'll pass to the assigner function.
+  SmallVector<ArgInfo, 8> OrigArgs;
+  unsigned i = 0;
+  for (auto &Arg : CI.arg_operands()) {
+    ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
+    setArgFlags(OrigArg, i + 1, DL, CI);
+    OrigArgs.push_back(OrigArg);
+    ++i;
+  }
+
+  MachineOperand Callee = MachineOperand::CreateImm(0);
+  if (Function *F = CI.getCalledFunction())
+    Callee = MachineOperand::CreateGA(F, 0);
+  else
+    Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+  ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+  if (!OrigRet.Ty->isVoidTy())
+    setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+
+  return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+}
+
+template <typename FuncInfoTy>
+void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                               const DataLayout &DL,
+                               const FuncInfoTy &FuncInfo) const {
+  const AttributeSet &Attrs = FuncInfo.getAttributes();
+  if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
+    Arg.Flags.setZExt();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
+    Arg.Flags.setSExt();
+  if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
+    Arg.Flags.setInReg();
+  if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
+    Arg.Flags.setSRet();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
+    Arg.Flags.setSwiftSelf();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
+    Arg.Flags.setSwiftError();
+  if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
+    Arg.Flags.setByVal();
+  if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
+    Arg.Flags.setInAlloca();
+
+  if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
+    Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+    // For ByVal, alignment should be passed from FE.  BE will guess if
+    // this info is not there but there are cases it cannot get right.
+    unsigned FrameAlign;
+    if (FuncInfo.getParamAlignment(OpIdx))
+      FrameAlign = FuncInfo.getParamAlignment(OpIdx);
+    else
+      FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
+    Arg.Flags.setByValAlign(FrameAlign);
+  }
+  if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
+    Arg.Flags.setNest();
+  Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty));
+}
+
+template void
+CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                                    const DataLayout &DL,
+                                    const Function &FuncInfo) const;
+
+template void
+CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                                    const DataLayout &DL,
+                                    const CallInst &FuncInfo) const;
+
+bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
+                                     CCAssignFn *AssignFn,
+                                     ArrayRef<ArgInfo> Args,
+                                     ValueHandler &Handler) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &F = *MF.getFunction();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+  unsigned NumArgs = Args.size();
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT CurVT = MVT::getVT(Args[i].Ty);
+    if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+      return false;
+  }
+
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc())
+      Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
+    else if (VA.isMemLoc()) {
+      unsigned Size = VA.getValVT() == MVT::iPTR
+                          ? DL.getPointerSize()
+                          : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+      unsigned Offset = VA.getLocMemOffset();
+      MachinePointerInfo MPO;
+      unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+      Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+    } else {
+      // FIXME: Support byvals and other weirdness
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+                                                    CCValAssign &VA) {
+  LLT LocTy{VA.getLocVT()};
+  switch (VA.getLocInfo()) {
+  default: break;
+  case CCValAssign::Full:
+  case CCValAssign::BCvt:
+    // FIXME: bitconverting between vector types may or may not be a
+    // nop in big-endian situations.
+    return ValReg;
+  case CCValAssign::AExt:
+    assert(!VA.getLocVT().isVector() && "unexpected vector extend");
+    // Otherwise, it's a nop.
+    return ValReg;
+  case CCValAssign::SExt: {
+    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildSExt(NewReg, ValReg);
+    return NewReg;
+  }
+  case CCValAssign::ZExt: {
+    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildZExt(NewReg, ValReg);
+    return NewReg;
+  }
+  }
+  llvm_unreachable("unable to extend register");
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 231e5ac..fcd2722 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -25,6 +25,8 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
 
 void llvm::initializeGlobalISel(PassRegistry &Registry) {
   initializeIRTranslatorPass(Registry);
+  initializeLegalizerPass(Registry);
   initializeRegBankSelectPass(Registry);
+  initializeInstructionSelectPass(Registry);
 }
 #endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b8a960c..89a042f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -14,12 +14,19 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 
 #define DEBUG_TYPE "irtranslator"
@@ -27,13 +34,29 @@
 using namespace llvm;
 
 char IRTranslator::ID = 0;
-INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI",
-                false, false);
+INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+                false, false)
+
+static void reportTranslationError(const Value &V, const Twine &Message) {
+  std::string ErrStorage;
+  raw_string_ostream Err(ErrStorage);
+  Err << Message << ": " << V << '\n';
+  report_fatal_error(Err.str());
+}
 
 IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
   initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
 }
 
+void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
 unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
   unsigned &ValReg = ValToVReg[&Val];
   // Check if this is the first time we see Val.
@@ -42,56 +65,132 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
     // we need to concat together to produce the value.
     assert(Val.getType()->isSized() &&
            "Don't know how to create an empty vreg");
-    assert(!Val.getType()->isAggregateType() && "Not yet implemented");
-    unsigned Size = Val.getType()->getPrimitiveSizeInBits();
-    unsigned VReg = MRI->createGenericVirtualRegister(Size);
+    unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
     ValReg = VReg;
-    assert(!isa<Constant>(Val) && "Not yet implemented");
+
+    if (auto CV = dyn_cast<Constant>(&Val)) {
+      bool Success = translate(*CV, VReg);
+      if (!Success) {
+        if (!TPC->isGlobalISelAbortEnabled()) {
+          MF->getProperties().set(
+              MachineFunctionProperties::Property::FailedISel);
+          return VReg;
+        }
+        reportTranslationError(Val, "unable to translate constant");
+      }
+    }
   }
   return ValReg;
 }
 
+int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
+  if (FrameIndices.find(&AI) != FrameIndices.end())
+    return FrameIndices[&AI];
+
+  unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+  unsigned Size =
+      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
+
+  // Always allocate at least one byte.
+  Size = std::max(Size, 1u);
+
+  unsigned Alignment = AI.getAlignment();
+  if (!Alignment)
+    Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+
+  int &FI = FrameIndices[&AI];
+  FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
+  return FI;
+}
+
+unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
+  unsigned Alignment = 0;
+  Type *ValTy = nullptr;
+  if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+    Alignment = SI->getAlignment();
+    ValTy = SI->getValueOperand()->getType();
+  } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+    Alignment = LI->getAlignment();
+    ValTy = LI->getType();
+  } else if (!TPC->isGlobalISelAbortEnabled()) {
+    MF->getProperties().set(
+        MachineFunctionProperties::Property::FailedISel);
+    return 1;
+  } else
+    llvm_unreachable("unhandled memory instruction");
+
+  return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
+}
+
 MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
   MachineBasicBlock *&MBB = BBToMBB[&BB];
   if (!MBB) {
-    MachineFunction &MF = MIRBuilder.getMF();
-    MBB = MF.CreateMachineBasicBlock();
-    MF.push_back(MBB);
+    MBB = MF->CreateMachineBasicBlock(&BB);
+    MF->push_back(MBB);
+
+    if (BB.hasAddressTaken())
+      MBB->setHasAddressTaken();
   }
   return *MBB;
 }
 
-bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) {
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
+                                     MachineIRBuilder &MIRBuilder) {
+  // FIXME: handle signed/unsigned wrapping flags.
+
   // Get or create a virtual register for each value.
   // Unless the value is a Constant => loadimm cst?
   // or inline constant each time?
   // Creation of a virtual register needs to have a size.
-  unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0));
-  unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1));
-  unsigned Res = getOrCreateVReg(Inst);
-  MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1);
+  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
   return true;
 }
 
-bool IRTranslator::translateReturn(const Instruction &Inst) {
-  assert(isa<ReturnInst>(Inst) && "Return expected");
-  const Value *Ret = cast<ReturnInst>(Inst).getReturnValue();
+bool IRTranslator::translateCompare(const User &U,
+                                    MachineIRBuilder &MIRBuilder) {
+  const CmpInst *CI = dyn_cast<CmpInst>(&U);
+  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+  unsigned Res = getOrCreateVReg(U);
+  CmpInst::Predicate Pred =
+      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
+                                    cast<ConstantExpr>(U).getPredicate());
+
+  if (CmpInst::isIntPredicate(Pred))
+    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+  else
+    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+
+  return true;
+}
+
+bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
+  const ReturnInst &RI = cast<ReturnInst>(U);
+  const Value *Ret = RI.getReturnValue();
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
   return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
 }
 
-bool IRTranslator::translateBr(const Instruction &Inst) {
-  assert(isa<BranchInst>(Inst) && "Branch expected");
-  const BranchInst &BrInst = *cast<BranchInst>(&Inst);
-  if (BrInst.isUnconditional()) {
-    const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0));
-    MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
-    MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB);
-  } else {
-    assert(0 && "Not yet implemented");
+bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
+  const BranchInst &BrInst = cast<BranchInst>(U);
+  unsigned Succ = 0;
+  if (!BrInst.isUnconditional()) {
+    // We want a G_BRCOND to the true BB followed by an unconditional branch.
+    unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+    const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
+    MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+    MIRBuilder.buildBrCond(Tst, TrueBB);
   }
+
+  const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
+  MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
+  MIRBuilder.buildBr(TgtBB);
+
   // Link successors.
   MachineBasicBlock &CurBB = MIRBuilder.getMBB();
   for (const BasicBlock *Succ : BrInst.successors())
@@ -99,66 +198,694 @@ bool IRTranslator::translateBr(const Instruction &Inst) {
   return true;
 }
 
+bool IRTranslator::translateSwitch(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  // For now, just translate as a chain of conditional branches.
+  // FIXME: could we share most of the logic/code in
+  // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
+  // At first sight, it seems most of the logic in there is independent of
+  // SelectionDAG-specifics and a lot of work went in to optimize switch
+  // lowering in there.
+
+  const SwitchInst &SwInst = cast<SwitchInst>(U);
+  const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+
+  LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+  for (auto &CaseIt : SwInst.cases()) {
+    const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
+    const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
+    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
+    MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+    MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+
+    MIRBuilder.buildBrCond(Tst, TrueBB);
+    CurBB.addSuccessor(&TrueBB);
+
+    MachineBasicBlock *FalseBB =
+        MF->CreateMachineBasicBlock(SwInst.getParent());
+    MF->push_back(FalseBB);
+    MIRBuilder.buildBr(*FalseBB);
+    CurBB.addSuccessor(FalseBB);
+
+    MIRBuilder.setMBB(*FalseBB);
+  }
+  // handle default case
+  MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
+  MIRBuilder.buildBr(DefaultBB);
+  MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+
+  return true;
+}
+
+bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
+  const LoadInst &LI = cast<LoadInst>(U);
+
+  if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
+    return false;
+
+  assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
+  auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
+                               : MachineMemOperand::MONone;
+  Flags |= MachineMemOperand::MOLoad;
+
+  unsigned Res = getOrCreateVReg(LI);
+  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
+  LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+  MIRBuilder.buildLoad(
+      Res, Addr,
+      *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
+                                Flags, DL->getTypeStoreSize(LI.getType()),
+                                getMemOpAlignment(LI)));
+  return true;
+}
+
+bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
+  const StoreInst &SI = cast<StoreInst>(U);
+
+  if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
+    return false;
+
+  assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
+  auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
+                               : MachineMemOperand::MONone;
+  Flags |= MachineMemOperand::MOStore;
+
+  unsigned Val = getOrCreateVReg(*SI.getValueOperand());
+  unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
+  LLT VTy{*SI.getValueOperand()->getType(), *DL},
+      PTy{*SI.getPointerOperand()->getType(), *DL};
+
+  MIRBuilder.buildStore(
+      Val, Addr,
+      *MF->getMachineMemOperand(
+          MachinePointerInfo(SI.getPointerOperand()), Flags,
+          DL->getTypeStoreSize(SI.getValueOperand()->getType()),
+          getMemOpAlignment(SI)));
+  return true;
+}
+
+bool IRTranslator::translateExtractValue(const User &U,
+                                         MachineIRBuilder &MIRBuilder) {
+  const Value *Src = U.getOperand(0);
+  Type *Int32Ty = Type::getInt32Ty(U.getContext());
+  SmallVector<Value *, 1> Indices;
+
+  // getIndexedOffsetInType is designed for GEPs, so the first index is the
+  // usual array element rather than looking into the actual aggregate.
+  Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
+    for (auto Idx : EVI->indices())
+      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+  } else {
+    for (unsigned i = 1; i < U.getNumOperands(); ++i)
+      Indices.push_back(U.getOperand(i));
+  }
+
+  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+
+  return true;
+}
+
+bool IRTranslator::translateInsertValue(const User &U,
+                                        MachineIRBuilder &MIRBuilder) {
+  const Value *Src = U.getOperand(0);
+  Type *Int32Ty = Type::getInt32Ty(U.getContext());
+  SmallVector<Value *, 1> Indices;
+
+  // getIndexedOffsetInType is designed for GEPs, so the first index is the
+  // usual array element rather than looking into the actual aggregate.
+  Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+    for (auto Idx : IVI->indices())
+      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+  } else {
+    for (unsigned i = 2; i < U.getNumOperands(); ++i)
+      Indices.push_back(U.getOperand(i));
+  }
+
+  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+  unsigned Res = getOrCreateVReg(U);
+  const Value &Inserted = *U.getOperand(1);
+  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
+                         Offset);
+
+  return true;
+}
+
+bool IRTranslator::translateSelect(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
+                         getOrCreateVReg(*U.getOperand(1)),
+                         getOrCreateVReg(*U.getOperand(2)));
+  return true;
+}
+
+bool IRTranslator::translateBitCast(const User &U,
+                                    MachineIRBuilder &MIRBuilder) {
+  if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+    unsigned &Reg = ValToVReg[&U];
+    if (Reg)
+      MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+    else
+      Reg = getOrCreateVReg(*U.getOperand(0));
+    return true;
+  }
+  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
+}
+
+bool IRTranslator::translateCast(unsigned Opcode, const User &U,
+                                 MachineIRBuilder &MIRBuilder) {
+  unsigned Op = getOrCreateVReg(*U.getOperand(0));
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+  return true;
+}
+
+bool IRTranslator::translateGetElementPtr(const User &U,
+                                          MachineIRBuilder &MIRBuilder) {
+  // FIXME: support vector GEPs.
+  if (U.getType()->isVectorTy())
+    return false;
+
+  Value &Op0 = *U.getOperand(0);
+  unsigned BaseReg = getOrCreateVReg(Op0);
+  LLT PtrTy{*Op0.getType(), *DL};
+  unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
+  LLT OffsetTy = LLT::scalar(PtrSize);
+
+  int64_t Offset = 0;
+  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
+       GTI != E; ++GTI) {
+    const Value *Idx = GTI.getOperand();
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
+      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
+      continue;
+    } else {
+      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+
+      // If this is a scalar constant or a splat vector of constants,
+      // handle it quickly.
+      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+        Offset += ElementSize * CI->getSExtValue();
+        continue;
+      }
+
+      if (Offset != 0) {
+        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+        unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+        MIRBuilder.buildConstant(OffsetReg, Offset);
+        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+
+        BaseReg = NewBaseReg;
+        Offset = 0;
+      }
+
+      // N = N + Idx * ElementSize;
+      unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
+      MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+
+      unsigned IdxReg = getOrCreateVReg(*Idx);
+      if (MRI->getType(IdxReg) != OffsetTy) {
+        unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+        MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
+        IdxReg = NewIdxReg;
+      }
+
+      unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+      MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg);
+
+      unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+      MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+      BaseReg = NewBaseReg;
+    }
+  }
+
+  if (Offset != 0) {
+    unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+    MIRBuilder.buildConstant(OffsetReg, Offset);
+    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+    return true;
+  }
+
+  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
+  return true;
+}
+
+bool IRTranslator::translateMemcpy(const CallInst &CI,
+                                   MachineIRBuilder &MIRBuilder) {
+  LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
+  if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
+          0 ||
+      cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
+          0 ||
+      SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
+    return false;
+
+  SmallVector<CallLowering::ArgInfo, 8> Args;
+  for (int i = 0; i < 3; ++i) {
+    const auto &Arg = CI.getArgOperand(i);
+    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+  }
+
+  MachineOperand Callee = MachineOperand::CreateES("memcpy");
+
+  return CLI->lowerCall(MIRBuilder, Callee,
+                        CallLowering::ArgInfo(0, CI.getType()), Args);
+}
+
+void IRTranslator::getStackGuard(unsigned DstReg,
+                                 MachineIRBuilder &MIRBuilder) {
+  auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
+  MIB.addDef(DstReg);
+
+  auto &TLI = *MF->getSubtarget().getTargetLowering();
+  Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent());
+  if (!Global)
+    return;
+
+  MachinePointerInfo MPInfo(Global);
+  MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
+  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+               MachineMemOperand::MODereferenceable;
+  *MemRefs =
+      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
+                               DL->getPointerABIAlignment());
+  MIB.setMemRefs(MemRefs, MemRefs + 1);
+}
+
+bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+                                              MachineIRBuilder &MIRBuilder) {
+  LLT Ty{*CI.getOperand(0)->getType(), *DL};
+  LLT s1 = LLT::scalar(1);
+  unsigned Width = Ty.getSizeInBits();
+  unsigned Res = MRI->createGenericVirtualRegister(Ty);
+  unsigned Overflow = MRI->createGenericVirtualRegister(s1);
+  auto MIB = MIRBuilder.buildInstr(Op)
+                 .addDef(Res)
+                 .addDef(Overflow)
+                 .addUse(getOrCreateVReg(*CI.getOperand(0)))
+                 .addUse(getOrCreateVReg(*CI.getOperand(1)));
+
+  if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
+    unsigned Zero = MRI->createGenericVirtualRegister(s1);
+    EntryBuilder.buildConstant(Zero, 0);
+    MIB.addUse(Zero);
+  }
+
+  MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+  return true;
+}
+
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+                                           MachineIRBuilder &MIRBuilder) {
+  switch (ID) {
+  default:
+    break;
+  case Intrinsic::dbg_declare:
+  case Intrinsic::dbg_value:
+    // FIXME: these obviously need to be supported properly.
+    MF->getProperties().set(
+          MachineFunctionProperties::Property::FailedISel);
+    return true;
+  case Intrinsic::uadd_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+  case Intrinsic::sadd_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
+  case Intrinsic::usub_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+  case Intrinsic::ssub_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
+  case Intrinsic::umul_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
+  case Intrinsic::smul_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+  case Intrinsic::memcpy:
+    return translateMemcpy(CI, MIRBuilder);
+  case Intrinsic::eh_typeid_for: {
+    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
+    unsigned Reg = getOrCreateVReg(CI);
+    unsigned TypeID = MF->getTypeIDFor(GV);
+    MIRBuilder.buildConstant(Reg, TypeID);
+    return true;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
+
+    MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
+    return true;
+  }
+  case Intrinsic::stackguard:
+    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
+    return true;
+  case Intrinsic::stackprotector: {
+    LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+    unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+    getStackGuard(GuardVal, MIRBuilder);
+
+    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+    MIRBuilder.buildStore(
+        GuardVal, getOrCreateVReg(*Slot),
+        *MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF,
+                                              getOrCreateFrameIndex(*Slot)),
+            MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+            PtrTy.getSizeInBits() / 8, 8));
+    return true;
+  }
+  }
+  return false;
+}
+
+bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
+  const CallInst &CI = cast<CallInst>(U);
+  auto TII = MF->getTarget().getIntrinsicInfo();
+  const Function *F = CI.getCalledFunction();
+
+  if (!F || !F->isIntrinsic()) {
+    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+    SmallVector<unsigned, 8> Args;
+    for (auto &Arg: CI.arg_operands())
+      Args.push_back(getOrCreateVReg(*Arg));
+
+    return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+      return getOrCreateVReg(*CI.getCalledValue());
+    });
+  }
+
+  Intrinsic::ID ID = F->getIntrinsicID();
+  if (TII && ID == Intrinsic::not_intrinsic)
+    ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+
+  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
+
+  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
+    return true;
+
+  unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+  MachineInstrBuilder MIB =
+      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+
+  for (auto &Arg : CI.arg_operands()) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
+      MIB.addImm(CI->getSExtValue());
+    else
+      MIB.addUse(getOrCreateVReg(*Arg));
+  }
+  return true;
+}
+
+bool IRTranslator::translateInvoke(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  const InvokeInst &I = cast<InvokeInst>(U);
+  MCContext &Context = MF->getContext();
+
+  const BasicBlock *ReturnBB = I.getSuccessor(0);
+  const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+  const Value *Callee(I.getCalledValue());
+  const Function *Fn = dyn_cast<Function>(Callee);
+  if (isa<InlineAsm>(Callee))
+    return false;
+
+  // FIXME: support invoking patchpoint and statepoint intrinsics.
+  if (Fn && Fn->isIntrinsic())
+    return false;
+
+  // FIXME: support whatever these are.
+  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+    return false;
+
+  // FIXME: support Windows exception handling.
+  if (!isa<LandingPadInst>(EHPadBB->front()))
+    return false;
+
+
+  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
+  // the region covered by the try.
+  MCSymbol *BeginSymbol = Context.createTempSymbol();
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+
+  unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
+  SmallVector<CallLowering::ArgInfo, 8> Args;
+  for (auto &Arg: I.arg_operands())
+    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+
+  if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
+                      CallLowering::ArgInfo(Res, I.getType()), Args))
+    return false;
+
+  MCSymbol *EndSymbol = Context.createTempSymbol();
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+
+  // FIXME: track probabilities.
+  MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
+                    &ReturnMBB = getOrCreateBB(*ReturnBB);
+  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+  MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
+  MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+
+  return true;
+}
+
+bool IRTranslator::translateLandingPad(const User &U,
+                                       MachineIRBuilder &MIRBuilder) {
+  const LandingPadInst &LP = cast<LandingPadInst>(U);
+
+  MachineBasicBlock &MBB = MIRBuilder.getMBB();
+  addLandingPadInfo(LP, MBB);
+
+  MBB.setIsEHPad();
+
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother.
+  auto &TLI = *MF->getSubtarget().getTargetLowering();
+  const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn();
+  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+    return true;
+
+  // If landingpad's return type is token type, we don't create DAG nodes
+  // for its exception pointer and selector value. The extraction of exception
+  // pointer or selector value from token type landingpads is not currently
+  // supported.
+  if (LP.getType()->isTokenTy())
+    return true;
+
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
+    .addSym(MF->addLandingPad(&MBB));
+
+  // Mark exception register as live in.
+  SmallVector<unsigned, 2> Regs;
+  SmallVector<uint64_t, 2> Offsets;
+  LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
+  if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
+    unsigned VReg = MRI->createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(VReg, Reg);
+    Regs.push_back(VReg);
+    Offsets.push_back(0);
+  }
+
+  if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
+    unsigned VReg = MRI->createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(VReg, Reg);
+    Regs.push_back(VReg);
+    Offsets.push_back(p0.getSizeInBits());
+  }
+
+  MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+  return true;
+}
+
+bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
+                                         MachineIRBuilder &MIRBuilder) {
+  if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
+    return false;
+
+  assert(AI.isStaticAlloca() && "only handle static allocas now");
+  unsigned Res = getOrCreateVReg(AI);
+  int FI = getOrCreateFrameIndex(AI);
+  MIRBuilder.buildFrameIndex(Res, FI);
+  return true;
+}
+
+bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
+  const PHINode &PI = cast<PHINode>(U);
+  auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI);
+  MIB.addDef(getOrCreateVReg(PI));
+
+  PendingPHIs.emplace_back(&PI, MIB.getInstr());
+  return true;
+}
+
+void IRTranslator::finishPendingPhis() {
+  for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) {
+    const PHINode *PI = Phi.first;
+    MachineInstrBuilder MIB(*MF, Phi.second);
+
+    // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
+    // won't create extra control flow here, otherwise we need to find the
+    // dominating predecessor here (or perhaps force the weirder IRTranslators
+    // to provide a simple boundary).
+    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
+      assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
+             "I appear to have misunderstood Machine PHIs");
+      MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
+      MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+    }
+  }
+}
+
 bool IRTranslator::translate(const Instruction &Inst) {
-  MIRBuilder.setDebugLoc(Inst.getDebugLoc());
+  CurBuilder.setDebugLoc(Inst.getDebugLoc());
   switch(Inst.getOpcode()) {
-  case Instruction::Add:
-    return translateBinaryOp(TargetOpcode::G_ADD, Inst);
-  case Instruction::Or:
-    return translateBinaryOp(TargetOpcode::G_OR, Inst);
-  case Instruction::Br:
-    return translateBr(Inst);
-  case Instruction::Ret:
-    return translateReturn(Inst);
-
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#include "llvm/IR/Instruction.def"
   default:
-    llvm_unreachable("Opcode not supported");
+    if (!TPC->isGlobalISelAbortEnabled())
+      return false;
+    llvm_unreachable("unknown opcode");
   }
 }
 
+bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+  if (auto CI = dyn_cast<ConstantInt>(&C))
+    EntryBuilder.buildConstant(Reg, *CI);
+  else if (auto CF = dyn_cast<ConstantFP>(&C))
+    EntryBuilder.buildFConstant(Reg, *CF);
+  else if (isa<UndefValue>(C))
+    EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+  else if (isa<ConstantPointerNull>(C))
+    EntryBuilder.buildConstant(Reg, 0);
+  else if (auto GV = dyn_cast<GlobalValue>(&C))
+    EntryBuilder.buildGlobalValue(Reg, GV);
+  else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+    switch(CE->getOpcode()) {
+#define HANDLE_INST(NUM, OPCODE, CLASS)                         \
+      case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#include "llvm/IR/Instruction.def"
+    default:
+      if (!TPC->isGlobalISelAbortEnabled())
+        return false;
+      llvm_unreachable("unknown opcode");
+    }
+  } else if (!TPC->isGlobalISelAbortEnabled())
+    return false;
+  else
+    llvm_unreachable("unhandled constant kind");
+
+  return true;
+}
 
-void IRTranslator::finalize() {
+void IRTranslator::finalizeFunction() {
   // Release the memory used by the different maps we
   // needed during the translation.
+  PendingPHIs.clear();
   ValToVReg.clear();
+  FrameIndices.clear();
   Constants.clear();
 }
 
-bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
-  const Function &F = *MF.getFunction();
+bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
+  MF = &CurMF;
+  const Function &F = *MF->getFunction();
   if (F.empty())
     return false;
-  CLI = MF.getSubtarget().getCallLowering();
-  MIRBuilder.setMF(MF);
-  MRI = &MF.getRegInfo();
-  // Setup the arguments.
-  MachineBasicBlock &MBB = getOrCreateBB(F.front());
-  MIRBuilder.setMBB(MBB);
+  CLI = MF->getSubtarget().getCallLowering();
+  CurBuilder.setMF(*MF);
+  EntryBuilder.setMF(*MF);
+  MRI = &MF->getRegInfo();
+  DL = &F.getParent()->getDataLayout();
+  TPC = &getAnalysis<TargetPassConfig>();
+
+  assert(PendingPHIs.empty() && "stale PHIs");
+
+  // Setup a separate basic-block for the arguments and constants, falling
+  // through to the IR-level Function's entry block.
+  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
+  MF->push_back(EntryBB);
+  EntryBB->addSuccessor(&getOrCreateBB(F.front()));
+  EntryBuilder.setMBB(*EntryBB);
+
+  // Lower the actual args into this basic block.
   SmallVector<unsigned, 8> VRegArgs;
   for (const Argument &Arg: F.args())
     VRegArgs.push_back(getOrCreateVReg(Arg));
-  bool Succeeded =
-      CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs);
-  if (!Succeeded)
+  bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
+  if (!Succeeded) {
+    if (!TPC->isGlobalISelAbortEnabled()) {
+      MF->getProperties().set(
+          MachineFunctionProperties::Property::FailedISel);
+      finalizeFunction();
+      return false;
+    }
     report_fatal_error("Unable to lower arguments");
+  }
 
+  // And translate the function!
   for (const BasicBlock &BB: F) {
     MachineBasicBlock &MBB = getOrCreateBB(BB);
     // Set the insertion point of all the following translations to
     // the end of this basic block.
-    MIRBuilder.setMBB(MBB);
+    CurBuilder.setMBB(MBB);
+
     for (const Instruction &Inst: BB) {
-      bool Succeeded = translate(Inst);
+      Succeeded &= translate(Inst);
       if (!Succeeded) {
-        DEBUG(dbgs() << "Cannot translate: " << Inst << '\n');
-        report_fatal_error("Unable to translate instruction");
+        if (TPC->isGlobalISelAbortEnabled())
+          reportTranslationError(Inst, "unable to translate instruction");
+        MF->getProperties().set(
+            MachineFunctionProperties::Property::FailedISel);
+        break;
       }
     }
   }
 
-  // Now that the MachineFrameInfo has been configured, no further changes to
-  // the reserved registers are possible.
-  MRI->freezeReservedRegs(MF);
+  if (Succeeded) {
+    finishPendingPhis();
+
+    // Now that the MachineFrameInfo has been configured, no further changes to
+    // the reserved registers are possible.
+    MRI->freezeReservedRegs(*MF);
+
+    // Merge the argument lowering and constants block with its single
+    // successor, the LLVM-IR entry block.  We want the basic block to
+    // be maximal.
+    assert(EntryBB->succ_size() == 1 &&
+           "Custom BB used for lowering should have only one successor");
+    // Get the successor of the current entry block.
+    MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+    assert(NewEntryBB.pred_size() == 1 &&
+           "LLVM-IR entry block has a predecessor!?");
+    // Move all the instruction from the current entry block to the
+    // new entry block.
+    NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+                      EntryBB->end());
+
+    // Update the live-in information for the new entry block.
+    for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+      NewEntryBB.addLiveIn(LiveIn);
+    NewEntryBB.sortUniqueLiveIns();
+
+    // Get rid of the now empty basic block.
+    EntryBB->removeSuccessor(&NewEntryBB);
+    MF->remove(EntryBB);
+
+    assert(&MF->front() == &NewEntryBB &&
+           "New entry wasn't next in the list of basic block!");
+  }
+
+  finalizeFunction();
 
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
new file mode 100644
index 0000000..1d205cd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -0,0 +1,175 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "instruction-select"
+
+using namespace llvm;
+
+char InstructionSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
+                      "Select target instructions out of generic instructions",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
+                    "Select target instructions out of generic instructions",
+                    false, false)
+
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
+  initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
+}
+
+void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  std::string ErrStorage;
+  raw_string_ostream Err(ErrStorage);
+  Err << Message << ":\nIn function: " << MF.getName() << '\n';
+  if (MI)
+    Err << *MI << '\n';
+  report_fatal_error(Err.str());
+}
+
+bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+
+  DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+  assert(ISel && "Cannot work without InstructionSelector");
+
+  // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
+  // other MF/MFI fields we need to initialize.
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+#ifndef NDEBUG
+  // Check that our input is fully legal: we require the function to have the
+  // Legalized property, so it should be.
+  // FIXME: This should be in the MachineVerifier, but it can't use the
+  // LegalizerInfo as it's currently in the separate GlobalISel library.
+  // The RegBankSelected property is already checked in the verifier. Note
+  // that it has the same layering problem, but we only use inline methods so
+  // end up not needing to link against the GlobalISel library.
+  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
+    for (const MachineBasicBlock &MBB : MF)
+      for (const MachineInstr &MI : MBB)
+        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+          reportSelectionError(&MI, "Instruction is not legal");
+
+#endif
+  // FIXME: We could introduce new blocks and will need to fix the outer loop.
+  // Until then, keep track of the number of blocks to assert that we don't.
+  const size_t NumBlocks = MF.size();
+
+  bool Failed = false;
+  for (MachineBasicBlock *MBB : post_order(&MF)) {
+    if (MBB->empty())
+      continue;
+
+    // Select instructions in reverse block order. We permit erasing so have
+    // to resort to manually iterating and recognizing the begin (rend) case.
+    bool ReachedBegin = false;
+    for (auto MII = std::prev(MBB->end()), Begin = MBB->begin();
+         !ReachedBegin;) {
+#ifndef NDEBUG
+      // Keep track of the insertion range for debug printing.
+      const auto AfterIt = std::next(MII);
+#endif
+      // Select this instruction.
+      MachineInstr &MI = *MII;
+
+      // And have our iterator point to the next instruction, if there is one.
+      if (MII == Begin)
+        ReachedBegin = true;
+      else
+        --MII;
+
+      DEBUG(dbgs() << "Selecting: \n  " << MI);
+
+      if (!ISel->select(MI)) {
+        if (TPC.isGlobalISelAbortEnabled())
+          // FIXME: It would be nice to dump all inserted instructions.  It's
+          // not
+          // obvious how, esp. considering select() can insert after MI.
+          reportSelectionError(&MI, "Cannot select");
+        Failed = true;
+        break;
+      }
+
+      // Dump the range of instructions that MI expanded into.
+      DEBUG({
+        auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);
+        dbgs() << "Into:\n";
+        for (auto &InsertedMI : make_range(InsertedBegin, AfterIt))
+          dbgs() << "  " << InsertedMI;
+        dbgs() << '\n';
+      });
+    }
+  }
+
+  // Now that selection is complete, there are no more generic vregs.  Verify
+  // that the size of the now-constrained vreg is unchanged and that it has a
+  // register class.
+  for (auto &VRegToType : MRI.getVRegToType()) {
+    unsigned VReg = VRegToType.first;
+    auto *RC = MRI.getRegClassOrNull(VReg);
+    auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
+                   ? nullptr
+                   : &*MRI.def_instr_begin(VReg);
+    if (!RC) {
+      if (TPC.isGlobalISelAbortEnabled())
+        reportSelectionError(MI, "VReg as no regclass after selection");
+      Failed = true;
+      break;
+    }
+
+    if (VRegToType.second.isValid() &&
+        VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
+      if (TPC.isGlobalISelAbortEnabled())
+        reportSelectionError(
+            MI, "VReg has explicit size different from class size");
+      Failed = true;
+      break;
+    }
+  }
+
+  MRI.getVRegToType().clear();
+
+  if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
+    MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+    return false;
+  }
+  assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
+
+  // FIXME: Should we accurately track changes?
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
new file mode 100644
index 0000000..5c34da0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -0,0 +1,60 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelector class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "instructionselector"
+
+using namespace llvm;
+
+InstructionSelector::InstructionSelector() {}
+
+bool InstructionSelector::constrainSelectedInstRegOperands(
+    MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+    const RegisterBankInfo &RBI) const {
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
+    MachineOperand &MO = I.getOperand(OpI);
+
+    // There's nothing to be done on non-register operands.
+    if (!MO.isReg())
+      continue;
+
+    DEBUG(dbgs() << "Converting operand: " << MO << '\n');
+    assert(MO.isReg() && "Unsupported non-reg operand");
+
+    unsigned Reg = MO.getReg();
+    // Physical registers don't need to be constrained.
+    if (TRI.isPhysicalRegister(Reg))
+      continue;
+
+    // Register operands with a value of 0 (e.g. predicate operands) don't need
+    // to be constrained.
+    if (Reg == 0)
+      continue;
+
+    // If the operand is a vreg, we should constrain its regclass, and only
+    // insert COPYs if that's impossible.
+    // constrainOperandRegClass does that for us.
+    MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
+                                       Reg, OpI));
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
new file mode 100644
index 0000000..e863568
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -0,0 +1,180 @@
+//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize individual
+/// instructions and the LegalizePass wrapper pass for the primary
+/// legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "legalizer"
+
+using namespace llvm;
+
+char Legalizer::ID = 0;
+INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
+                      "Legalize the Machine IR a function's Machine IR", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
+                    "Legalize the Machine IR a function's Machine IR", false,
+                    false)
+
+Legalizer::Legalizer() : MachineFunctionPass(ID) {
+  initializeLegalizerPass(*PassRegistry::getPassRegistry());
+}
+
+void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void Legalizer::init(MachineFunction &MF) {
+}
+
+bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                const TargetInstrInfo &TII) {
+  bool Changed = false;
+  if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
+    return Changed;
+
+  unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
+  unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+  MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
+  if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
+      return Changed;
+
+  unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
+  bool AllDefsReplaced = true;
+
+  // Try to match each register extracted with a corresponding insertion formed
+  // by the G_SEQUENCE.
+  for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
+    MachineOperand &ExtractMO = MI.getOperand(Idx);
+    assert(ExtractMO.isReg() && ExtractMO.isDef() &&
+           "unexpected extract operand");
+
+    unsigned ExtractReg = ExtractMO.getReg();
+    unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
+
+    while (SeqIdx < NumSeqSrcs &&
+           SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
+      ++SeqIdx;
+
+    if (SeqIdx == NumSeqSrcs) {
+      AllDefsReplaced = false;
+      continue;
+    }
+
+    unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
+    if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
+        MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
+      AllDefsReplaced = false;
+      continue;
+    }
+
+    assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
+           "unexpected physical register in G_SEQUENCE");
+
+    // Finally we can replace the uses.
+    for (auto &Use : MRI.use_operands(ExtractReg)) {
+      Changed = true;
+      Use.setReg(OrigReg);
+    }
+  }
+
+  if (AllDefsReplaced) {
+    // If SeqI was the next instruction in the BB and we removed it, we'd break
+    // the outer iteration.
+    assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
+           "G_SEQUENCE does not dominate G_EXTRACT");
+
+    MI.eraseFromParent();
+
+    if (MRI.use_empty(SrcReg))
+      SeqI.eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+  init(MF);
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+  LegalizerHelper Helper(MF);
+
+  // FIXME: an instruction may need more than one pass before it is legal. For
+  // example on most architectures <3 x i3> is doubly-illegal. It would
+  // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We
+  // probably want a worklist of instructions rather than naive iterate until
+  // convergence for performance reasons.
+  bool Changed = false;
+  MachineBasicBlock::iterator NextMI;
+  for (auto &MBB : MF)
+    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+      // Get the next Instruction before we try to legalize, because there's a
+      // good chance MI will be deleted.
+      NextMI = std::next(MI);
+
+      // Only legalize pre-isel generic instructions: others don't have types
+      // and are assumed to be legal.
+      if (!isPreISelGenericOpcode(MI->getOpcode()))
+        continue;
+
+      auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
+
+      // Error out if we couldn't legalize this instruction. We may want to fall
+      // back to DAG ISel instead in the future.
+      if (Res == LegalizerHelper::UnableToLegalize) {
+        if (!TPC.isGlobalISelAbortEnabled()) {
+          MF.getProperties().set(
+              MachineFunctionProperties::Property::FailedISel);
+          return false;
+        }
+        std::string Msg;
+        raw_string_ostream OS(Msg);
+        OS << "unable to legalize instruction: ";
+        MI->print(OS);
+        report_fatal_error(OS.str());
+      }
+
+      Changed |= Res == LegalizerHelper::Legalized;
+    }
+
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  for (auto &MBB : MF) {
+    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+      // Get the next Instruction before we try to legalize, because there's a
+      // good chance MI will be deleted.
+      NextMI = std::next(MI);
+
+      Changed |= combineExtracts(*MI, MRI, TII);
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
new file mode 100644
index 0000000..eb25b6c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -0,0 +1,354 @@
+//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize
+/// individual instructions and the LegalizeMachineIR wrapper pass for the
+/// primary legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <sstream>
+
+#define DEBUG_TYPE "legalize-mir"
+
+using namespace llvm;
+
+LegalizerHelper::LegalizerHelper(MachineFunction &MF)
+  : MRI(MF.getRegInfo()) {
+  MIRBuilder.setMF(MF);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
+                                   const LegalizerInfo &LegalizerInfo) {
+  auto Action = LegalizerInfo.getAction(MI, MRI);
+  switch (std::get<0>(Action)) {
+  case LegalizerInfo::Legal:
+    return AlreadyLegal;
+  case LegalizerInfo::Libcall:
+    return libcall(MI);
+  case LegalizerInfo::NarrowScalar:
+    return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::WidenScalar:
+    return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::Lower:
+    return lower(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::FewerElements:
+    return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+  default:
+    return UnableToLegalize;
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstr(MachineInstr &MI,
+                               const LegalizerInfo &LegalizerInfo) {
+  SmallVector<MachineInstr *, 4> WorkList;
+  MIRBuilder.recordInsertions(
+      [&](MachineInstr *MI) { WorkList.push_back(MI); });
+  WorkList.push_back(&MI);
+
+  bool Changed = false;
+  LegalizeResult Res;
+  unsigned Idx = 0;
+  do {
+    Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
+    if (Res == UnableToLegalize) {
+      MIRBuilder.stopRecordingInsertions();
+      return UnableToLegalize;
+    }
+    Changed |= Res == Legalized;
+    ++Idx;
+  } while (Idx < WorkList.size());
+
+  MIRBuilder.stopRecordingInsertions();
+
+  return Changed ? Legalized : AlreadyLegal;
+}
+
+void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
+                                   SmallVectorImpl<unsigned> &VRegs) {
+  unsigned Size = Ty.getSizeInBits();
+  SmallVector<uint64_t, 4> Indexes;
+  for (int i = 0; i < NumParts; ++i) {
+    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+    Indexes.push_back(i * Size);
+  }
+  MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::libcall(MachineInstr &MI) {
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned Size = Ty.getSizeInBits();
+  MIRBuilder.setInstr(MI);
+
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_FREM: {
+    auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+    Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
+    auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+    auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+    const char *Name =
+        TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
+
+    CLI.lowerCall(
+        MIRBuilder, MachineOperand::CreateES(Name),
+        {MI.getOperand(0).getReg(), Ty},
+        {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
+                                                              unsigned TypeIdx,
+                                                              LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD: {
+    // Expand in terms of carry-setting/consuming G_ADDE instructions.
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+                   NarrowTy.getSizeInBits();
+
+    MIRBuilder.setInstr(MI);
+
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<uint64_t, 2> Indexes;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+    unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    MIRBuilder.buildConstant(CarryIn, 0);
+
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+      MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
+                            Src2Regs[i], CarryIn);
+
+      DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
+      CarryIn = CarryOut;
+    }
+    unsigned DstReg = MI.getOperand(0).getReg();
+    MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
+  MIRBuilder.setInstr(MI);
+
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_SUB: {
+    // Perform operation at larger width (any extension is fine here, high bits
+    // don't affect the result) and then truncate the result back to the
+    // original type.
+    unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+    unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg());
+    MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg());
+
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(MI.getOpcode())
+        .addDef(DstExt)
+        .addUse(Src1Ext)
+        .addUse(Src2Ext);
+
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UDIV: {
+    unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
+                          ? TargetOpcode::G_SEXT
+                          : TargetOpcode::G_ZEXT;
+
+    unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
+        MI.getOperand(1).getReg());
+
+    unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse(
+        MI.getOperand(2).getReg());
+
+    unsigned ResExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(MI.getOpcode())
+        .addDef(ResExt)
+        .addUse(LHSExt)
+        .addUse(RHSExt);
+
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_LOAD: {
+    assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+               WideTy.getSizeInBits() &&
+           "illegal to increase number of bytes loaded");
+
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(),
+                         **MI.memoperands_begin());
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_STORE: {
+    assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+               WideTy.getSizeInBits() &&
+           "illegal to increase number of bytes modified by a store");
+
+    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+    MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
+                          **MI.memoperands_begin());
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_CONSTANT: {
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm());
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_FCONSTANT: {
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm());
+    MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_BRCOND: {
+    unsigned TstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg());
+    MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB());
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_ICMP: {
+    assert(TypeIdx == 1 && "unable to legalize predicate");
+    bool IsSigned = CmpInst::isSigned(
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
+    unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+    unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+    if (IsSigned) {
+      MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg());
+      MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg());
+    } else {
+      MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg());
+      MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg());
+    }
+    MIRBuilder.buildICmp(
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
+        MI.getOperand(0).getReg(), Op0Ext, Op1Ext);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_GEP: {
+    assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+    unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg());
+    MI.getOperand(2).setReg(OffsetExt);
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  using namespace TargetOpcode;
+  MIRBuilder.setInstr(MI);
+
+  switch(MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_SREM:
+  case TargetOpcode::G_UREM: {
+    unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+    MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
+        .addDef(QuotReg)
+        .addUse(MI.getOperand(1).getReg())
+        .addUse(MI.getOperand(2).getReg());
+
+    unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+    MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
+    MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+                        ProdReg);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD: {
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    unsigned DstReg = MI.getOperand(0).getReg();
+    int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize;
+
+    MIRBuilder.setInstr(MI);
+
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<uint64_t, 2> Indexes;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
+      DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
+    }
+
+    MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
new file mode 100644
index 0000000..e496620
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -0,0 +1,182 @@
+//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+// Issues to be resolved:
+//   + Make it fast.
+//   + Support weird types like i3, <7 x i3>, ...
+//   + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+  // FIXME: these two can be legalized to the fundamental load/store Jakob
+  // proposed. Once loads & stores are supported.
+  DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
+  DefaultActions[TargetOpcode::G_TRUNC] = Legal;
+
+  DefaultActions[TargetOpcode::G_INTRINSIC] = Legal;
+  DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal;
+
+  DefaultActions[TargetOpcode::G_ADD] = NarrowScalar;
+  DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar;
+  DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
+
+  DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+}
+
+void LegalizerInfo::computeTables() {
+  for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) {
+    for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) {
+      for (auto &Action : Actions[Opcode][Idx]) {
+        LLT Ty = Action.first;
+        if (!Ty.isVector())
+          continue;
+
+        auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp,
+                                                        Ty.getElementType())];
+        Entry = std::max(Entry, Ty.getNumElements());
+      }
+    }
+  }
+
+  TablesInitialized = true;
+}
+
+// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
+// probably going to need specialized lookup structures for various types before
+// we have any hope of doing well with something like <13 x i3>. Even the common
+// cases should do better than what we have now.
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::getAction(const InstrAspect &Aspect) const {
+  assert(TablesInitialized && "backend forgot to call computeTables");
+  // These *have* to be implemented for now, they're the fundamental basis of
+  // how everything else is transformed.
+
+  // Nothing is going to go well with types that aren't a power of 2 yet, so
+  // don't even try because we might make things worse.
+  if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
+      return std::make_pair(Unsupported, LLT());
+
+  // FIXME: the long-term plan calls for expansion in terms of load/store (if
+  // they're not legal).
+  if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
+      Aspect.Opcode == TargetOpcode::G_EXTRACT)
+    return std::make_pair(Legal, Aspect.Type);
+
+  LegalizeAction Action = findInActions(Aspect);
+  if (Action != NotFound)
+    return findLegalAction(Aspect, Action);
+
+  unsigned Opcode = Aspect.Opcode;
+  LLT Ty = Aspect.Type;
+  if (!Ty.isVector()) {
+    auto DefaultAction = DefaultActions.find(Aspect.Opcode);
+    if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
+      return std::make_pair(Legal, Ty);
+
+    if (DefaultAction == DefaultActions.end() ||
+        DefaultAction->second != NarrowScalar)
+      return std::make_pair(Unsupported, LLT());
+    return findLegalAction(Aspect, NarrowScalar);
+  }
+
+  LLT EltTy = Ty.getElementType();
+  int NumElts = Ty.getNumElements();
+
+  auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy));
+  if (ScalarAction != ScalarInVectorActions.end() &&
+      ScalarAction->second != Legal)
+    return findLegalAction(Aspect, ScalarAction->second);
+
+  // The element type is legal in principle, but the number of elements is
+  // wrong.
+  auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy));
+  if (MaxLegalElts > NumElts)
+    return findLegalAction(Aspect, MoreElements);
+
+  if (MaxLegalElts == 0) {
+    // Scalarize if there's no legal vector type, which is just a special case
+    // of FewerElements.
+    return std::make_pair(FewerElements, EltTy);
+  }
+
+  return findLegalAction(Aspect, FewerElements);
+}
+
+std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
+LegalizerInfo::getAction(const MachineInstr &MI,
+                         const MachineRegisterInfo &MRI) const {
+  SmallBitVector SeenTypes(8);
+  const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+  for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
+    if (!OpInfo[i].isGenericType())
+      continue;
+
+    // We don't want to repeatedly check the same operand index, that
+    // could get expensive.
+    unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
+    if (SeenTypes[TypeIdx])
+      continue;
+
+    SeenTypes.set(TypeIdx);
+
+    LLT Ty = MRI.getType(MI.getOperand(i).getReg());
+    auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
+    if (Action.first != Legal)
+      return std::make_tuple(Action.first, TypeIdx, Action.second);
+  }
+  return std::make_tuple(Legal, 0, LLT{});
+}
+
+bool LegalizerInfo::isLegal(const MachineInstr &MI,
+                            const MachineRegisterInfo &MRI) const {
+  return std::get<0>(getAction(MI, MRI)) == Legal;
+}
+
+LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
+                                 LegalizeAction Action) const {
+  switch(Action) {
+  default:
+    llvm_unreachable("Cannot find legal type");
+  case Legal:
+  case Lower:
+  case Libcall:
+    return Aspect.Type;
+  case NarrowScalar: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+  }
+  case WidenScalar: {
+    return findLegalType(Aspect, [&](LLT Ty) -> LLT {
+      return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
+    });
+  }
+  case FewerElements: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+  }
+  case MoreElements: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+  }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 2f19bcf..c04f6e4 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -23,82 +24,408 @@ using namespace llvm;
 void MachineIRBuilder::setMF(MachineFunction &MF) {
   this->MF = &MF;
   this->MBB = nullptr;
+  this->MRI = &MF.getRegInfo();
   this->TII = MF.getSubtarget().getInstrInfo();
   this->DL = DebugLoc();
-  this->MI = nullptr;
+  this->II = MachineBasicBlock::iterator();
+  this->InsertedInstr = nullptr;
 }
 
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
-  Before = Beginning;
+  this->II = MBB.end();
   assert(&getMF() == MBB.getParent() &&
          "Basic block is in a different function");
 }
 
-void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
   assert(MI.getParent() && "Instruction is not part of a basic block");
   setMBB(*MI.getParent());
-  this->MI = &MI;
-  this->Before = Before;
+  this->II = MI.getIterator();
 }
 
-MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() {
-  if (MI) {
-    if (Before)
-      return MI;
-    if (!MI->getNextNode())
-      return getMBB().end();
-    return MI->getNextNode();
-  }
-  return Before ? getMBB().begin() : getMBB().end();
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator II) {
+  assert(MBB.getParent() == &getMF() &&
+         "Basic block is in a different function");
+  this->MBB = &MBB;
+  this->II = II;
+}
+
+void MachineIRBuilder::recordInsertions(
+    std::function<void(MachineInstr *)> Inserted) {
+  InsertedInstr = Inserted;
+}
+
+void MachineIRBuilder::stopRecordingInsertions() {
+  InsertedInstr = nullptr;
 }
 
 //------------------------------------------------------------------------------
 // Build instruction variants.
 //------------------------------------------------------------------------------
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) {
-  MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode));
-  if (Ty) {
-    assert(isPreISelGenericOpcode(Opcode) &&
-           "Only generic instruction can have a type");
-    NewMI->setType(Ty);
-  } else
-    assert(!isPreISelGenericOpcode(Opcode) &&
-           "Generic instruction must have a type");
-  getMBB().insert(getInsertPt(), NewMI);
-  return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
+  return insertInstr(buildInstrNoInsert(Opcode));
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
-                                           unsigned Op0, unsigned Op1) {
-  return buildInstr(Opcode, nullptr, Res, Op0, Op1);
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
+  MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode));
+  return MIB;
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
-                                           unsigned Res, unsigned Op0,
-                                           unsigned Op1) {
-  MachineInstr *NewMI = buildInstr(Opcode, Ty);
-  MachineInstrBuilder(getMF(), NewMI)
-      .addReg(Res, RegState::Define)
-      .addReg(Op0)
-      .addReg(Op1);
-  return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
+  getMBB().insert(getInsertPt(), MIB);
+  if (InsertedInstr)
+    InsertedInstr(MIB);
+  return MIB;
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
-                                           unsigned Op0) {
-  MachineInstr *NewMI = buildInstr(Opcode, nullptr);
-  MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0);
-  return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
+  assert(MRI->getType(Res).isPointer() && "invalid operand type");
+  return buildInstr(TargetOpcode::G_FRAME_INDEX)
+      .addDef(Res)
+      .addFrameIndex(Idx);
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) {
-  return buildInstr(Opcode, nullptr);
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+                                                       const GlobalValue *GV) {
+  assert(MRI->getType(Res).isPointer() && "invalid operand type");
+  assert(MRI->getType(Res).getAddressSpace() ==
+             GV->getType()->getAddressSpace() &&
+         "address space mismatch");
+
+  return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
+      .addDef(Res)
+      .addGlobalAddress(GV);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_ADD)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert(MRI->getType(Res).isPointer() &&
+         MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+  assert(MRI->getType(Op1).isScalar()  && "invalid offset type");
+
+  return buildInstr(TargetOpcode::G_GEP)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_SUB)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_MUL)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
-                                           MachineBasicBlock &BB) {
-  MachineInstr *NewMI = buildInstr(Opcode, Ty);
-  MachineInstrBuilder(getMF(), NewMI).addMBB(&BB);
-  return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
+  return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+  return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+                                                    const ConstantInt &Val) {
+  LLT Ty = MRI->getType(Res);
+
+  assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
+
+  const ConstantInt *NewVal = &Val;
+  if (Ty.getSizeInBits() != Val.getBitWidth())
+    NewVal = ConstantInt::get(MF->getFunction()->getContext(),
+                              Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
+
+  return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+                                                    int64_t Val) {
+  auto IntN = IntegerType::get(MF->getFunction()->getContext(),
+                               MRI->getType(Res).getSizeInBits());
+  ConstantInt *CI = ConstantInt::get(IntN, Val, true);
+  return buildConstant(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res,
+                                                     const ConstantFP &Val) {
+  assert(MRI->getType(Res).isScalar() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+                                                  MachineBasicBlock &Dest) {
+  assert(MRI->getType(Tst).isScalar() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+                                                MachineMemOperand &MMO) {
+  assert(MRI->getType(Res).isValid() && "invalid operand type");
+  assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_LOAD)
+      .addDef(Res)
+      .addUse(Addr)
+      .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+                                                 MachineMemOperand &MMO) {
+  assert(MRI->getType(Val).isValid() && "invalid operand type");
+  assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_STORE)
+      .addUse(Val)
+      .addUse(Addr)
+      .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,
+                                                 unsigned CarryOut,
+                                                 unsigned Op0, unsigned Op1,
+                                                 unsigned CarryIn) {
+  assert(MRI->getType(Res).isScalar() && "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+  assert(MRI->getType(CarryOut).isScalar() && "invalid operand type");
+  assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_UADDE)
+      .addDef(Res)
+      .addDef(CarryOut)
+      .addUse(Op0)
+      .addUse(Op1)
+      .addUse(CarryIn);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
+                                                       unsigned Op) {
+  unsigned Opcode = TargetOpcode::COPY;
+  if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+    Opcode = TargetOpcode::G_SEXT;
+  else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+    Opcode = TargetOpcode::G_TRUNC;
+
+  return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
+                                                   ArrayRef<uint64_t> Indices,
+                                                   unsigned Src) {
+#ifndef NDEBUG
+  assert(Results.size() == Indices.size() && "inconsistent number of regs");
+  assert(!Results.empty() && "invalid trivial extract");
+  assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+         "extract offsets must be in ascending order");
+
+  assert(MRI->getType(Src).isValid() && "invalid operand type");
+  for (auto Res : Results)
+    assert(MRI->getType(Res).isValid() && "invalid operand type");
+#endif
+
+  auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
+  for (auto Res : Results)
+    MIB.addDef(Res);
+
+  MIB.addUse(Src);
+
+  for (auto Idx : Indices)
+    MIB.addImm(Idx);
+
+  getMBB().insert(getInsertPt(), MIB);
+  if (InsertedInstr)
+    InsertedInstr(MIB);
+
+  return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildSequence(unsigned Res,
+                                ArrayRef<unsigned> Ops,
+                                ArrayRef<uint64_t> Indices) {
+#ifndef NDEBUG
+  assert(Ops.size() == Indices.size() && "incompatible args");
+  assert(!Ops.empty() && "invalid trivial sequence");
+  assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+         "sequence offsets must be in ascending order");
+
+  assert(MRI->getType(Res).isValid() && "invalid operand type");
+  for (auto Op : Ops)
+    assert(MRI->getType(Op).isValid() && "invalid operand type");
+#endif
+
+  MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
+  MIB.addDef(Res);
+  for (unsigned i = 0; i < Ops.size(); ++i) {
+    MIB.addUse(Ops[i]);
+    MIB.addImm(Indices[i]);
+  }
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+                                                     unsigned Res,
+                                                     bool HasSideEffects) {
+  auto MIB =
+      buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+                                : TargetOpcode::G_INTRINSIC);
+  if (Res)
+    MIB.addDef(Res);
+  MIB.addIntrinsicID(ID);
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, false);
+  return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, false);
+  return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+                                                unsigned Res, unsigned Op0,
+                                                unsigned Op1) {
+#ifndef NDEBUG
+  assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch");
+  assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
+  if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer())
+    assert(MRI->getType(Res).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Res).isVector() &&
+           MRI->getType(Res).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_ICMP)
+      .addDef(Res)
+      .addPredicate(Pred)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+                                                unsigned Res, unsigned Op0,
+                                                unsigned Op1) {
+#ifndef NDEBUG
+  assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch");
+  assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
+  if (MRI->getType(Op0).isScalar())
+    assert(MRI->getType(Res).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Res).isVector() &&
+           MRI->getType(Res).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_FCMP)
+      .addDef(Res)
+      .addPredicate(Pred)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
+                                                  unsigned Op0, unsigned Op1) {
+#ifndef NDEBUG
+  LLT ResTy = MRI->getType(Res);
+  assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+         "invalid operand type");
+  assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) &&
+         "type mismatch");
+  if (ResTy.isScalar() || ResTy.isPointer())
+    assert(MRI->getType(Tst).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Tst).isVector() &&
+           MRI->getType(Tst).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_SELECT)
+      .addDef(Res)
+      .addUse(Tst)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
+                                        bool IsExtend) {
+#ifndef NDEBUG
+  LLT SrcTy = MRI->getType(Src);
+  LLT DstTy = MRI->getType(Dst);
+
+  if (DstTy.isVector()) {
+    assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector");
+    assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+           "different number of elements in a trunc/ext");
+  } else
+    assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
+
+  if (IsExtend)
+    assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+           "invalid narrowing extend");
+  else
+    assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+           "invalid widening trunc");
+#endif
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 419e270..cc026ef 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -12,10 +12,12 @@
 
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
@@ -31,18 +33,18 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
     cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
                           "Run the Fast mode (default mapping)"),
                clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
-                          "Use the Greedy mode (best local mapping)"),
-               clEnumValEnd));
+                          "Use the Greedy mode (best local mapping)")));
 
 char RegBankSelect::ID = 0;
-INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
                       "Assign register bank of generic virtual registers",
                       false, false);
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
                     "Assign register bank of generic virtual registers", false,
-                    false);
+                    false)
 
 RegBankSelect::RegBankSelect(Mode RunningMode)
     : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
@@ -60,6 +62,7 @@ void RegBankSelect::init(MachineFunction &MF) {
   assert(RBI && "Cannot work without RegisterBankInfo");
   MRI = &MF.getRegInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
+  TPC = &getAnalysis<TargetPassConfig>();
   if (OptMode != Mode::Fast) {
     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
     MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -77,6 +80,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addRequired<MachineBranchProbabilityInfo>();
   }
+  AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -87,7 +91,7 @@ bool RegBankSelect::assignmentMatch(
   OnlyAssign = false;
   // Each part of a break down needs to end up in a different register.
   // In other word, Reg assignement does not match.
-  if (ValMapping.BreakDown.size() > 1)
+  if (ValMapping.NumBreakDowns > 1)
     return false;
 
   const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -103,11 +107,13 @@ bool RegBankSelect::assignmentMatch(
   return CurRegBank == DesiredRegBrank;
 }
 
-void RegBankSelect::repairReg(
+bool RegBankSelect::repairReg(
     MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
     RegBankSelect::RepairingPlacement &RepairPt,
     const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
-  assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented");
+  if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
+    return false;
+  assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
   // An empty range of new register means no repairing.
   assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
 
@@ -126,7 +132,7 @@ void RegBankSelect::repairReg(
          "We are about to create several defs for Dst");
 
   // Build the instruction used to repair, then clone it at the right places.
-  MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src);
+  MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
   MI->removeFromParent();
   DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
                << '\n');
@@ -149,15 +155,16 @@ void RegBankSelect::repairReg(
   }
   // TODO:
   // Legalize NewInstrs if need be.
+  return true;
 }
 
 uint64_t RegBankSelect::getRepairCost(
     const MachineOperand &MO,
     const RegisterBankInfo::ValueMapping &ValMapping) const {
   assert(MO.isReg() && "We should only repair register operand");
-  assert(!ValMapping.BreakDown.empty() && "Nothing to map??");
+  assert(ValMapping.NumBreakDowns && "Nothing to map??");
 
-  bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1;
+  bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1;
   const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
   // If MO does not have a register bank, we should have just been
   // able to set one unless we have to break the value down.
@@ -195,16 +202,20 @@ uint64_t RegBankSelect::getRepairCost(
     // TODO: use a dedicated constant for ImpossibleCost.
     if (Cost != UINT_MAX)
       return Cost;
-    assert(false && "Legalization not available yet");
+    assert(!TPC->isGlobalISelAbortEnabled() &&
+           "Legalization not available yet");
     // Return the legalization cost of that repairing.
   }
-  assert(false && "Complex repairing not implemented yet");
-  return 1;
+  assert(!TPC->isGlobalISelAbortEnabled() &&
+         "Complex repairing not implemented yet");
+  return UINT_MAX;
 }
 
 RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
     MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
     SmallVectorImpl<RepairingPlacement> &RepairPts) {
+  assert(!PossibleMappings.empty() &&
+         "Do not know how to map this instruction");
 
   RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
   MappingCost Cost = MappingCost::ImpossibleCost();
@@ -212,6 +223,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
   for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) {
     MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost);
     if (CurCost < Cost) {
+      DEBUG(dbgs() << "New best: " << CurCost << '\n');
       Cost = CurCost;
       BestMapping = &CurMapping;
       RepairPts.clear();
@@ -219,7 +231,15 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
         RepairPts.emplace_back(std::move(RepairPt));
     }
   }
-  assert(BestMapping && "No suitable mapping for instruction");
+  if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
+    // If none of the mapping worked that means they are all impossible.
+    // Thus, pick the first one and set an impossible repairing point.
+    // It will trigger the failed isel mode.
+    BestMapping = &(*PossibleMappings.begin());
+    RepairPts.emplace_back(
+        RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
+  } else
+    assert(BestMapping && "No suitable mapping for instruction");
   return *BestMapping;
 }
 
@@ -250,7 +270,7 @@ void RegBankSelect::tryAvoidingSplit(
       // For the PHI case, the split may not be actually required.
       // In the copy case, a phi is already a copy on the incoming edge,
       // therefore there is no need to split.
-      if (ValMapping.BreakDown.size() == 1)
+      if (ValMapping.NumBreakDowns == 1)
         // This is a already a copy, there is nothing to do.
         RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
     }
@@ -327,7 +347,7 @@ void RegBankSelect::tryAvoidingSplit(
     // We will split all the edges and repair there.
   } else {
     // This is a virtual register defined by a terminator.
-    if (ValMapping.BreakDown.size() == 1) {
+    if (ValMapping.NumBreakDowns == 1) {
       // There is nothing to repair, but we may actually lie on
       // the repairing cost because of the PHIs already proceeded
       // as already stated.
@@ -348,6 +368,9 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
     const RegBankSelect::MappingCost *BestCost) {
   assert((MBFI || !BestCost) && "Costs comparison require MBFI");
 
+  if (!InstrMapping.isValid())
+    return MappingCost::ImpossibleCost();
+
   // If mapped with InstrMapping, MI will have the recorded cost.
   MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
   bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
@@ -355,32 +378,34 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
   DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
   DEBUG(dbgs() << "With: " << InstrMapping << '\n');
   RepairPts.clear();
-  if (BestCost && Cost > *BestCost)
+  if (BestCost && Cost > *BestCost) {
+    DEBUG(dbgs() << "Mapping is too expensive from the start\n");
     return Cost;
+  }
 
   // Moreover, to realize this mapping, the register bank of each operand must
   // match this mapping. In other words, we may need to locally reassign the
   // register banks. Account for that repairing cost as well.
   // In this context, local means in the surrounding of MI.
-  for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
-       ++OpIdx) {
+  for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands();
+       OpIdx != EndOpIdx; ++OpIdx) {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
-    DEBUG(dbgs() << "Opd" << OpIdx);
+    DEBUG(dbgs() << "Opd" << OpIdx << '\n');
     const RegisterBankInfo::ValueMapping &ValMapping =
         InstrMapping.getOperandMapping(OpIdx);
     // If Reg is already properly mapped, this is free.
     bool Assign;
     if (assignmentMatch(Reg, ValMapping, Assign)) {
-      DEBUG(dbgs() << " is free (match).\n");
+      DEBUG(dbgs() << "=> is free (match).\n");
       continue;
     }
     if (Assign) {
-      DEBUG(dbgs() << " is free (simple assignment).\n");
+      DEBUG(dbgs() << "=> is free (simple assignment).\n");
       RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
                                                 RepairingPlacement::Reassign));
       continue;
@@ -398,8 +423,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
       tryAvoidingSplit(RepairPt, MO, ValMapping);
 
     // Check that the materialization of the repairing is possible.
-    if (!RepairPt.canMaterialize())
+    if (!RepairPt.canMaterialize()) {
+      DEBUG(dbgs() << "Mapping involves impossible repairing\n");
       return MappingCost::ImpossibleCost();
+    }
 
     // Account for the split cost and repair cost.
     // Unless the cost is already saturated or we do not care about the cost.
@@ -454,8 +481,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
 
       // Stop looking into what it takes to repair, this is already
       // too expensive.
-      if (BestCost && Cost > *BestCost)
+      if (BestCost && Cost > *BestCost) {
+        DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
         return Cost;
+      }
 
       // No need to accumulate more cost information.
       // We need to still gather the repairing information though.
@@ -463,10 +492,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
         break;
     }
   }
+  DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
   return Cost;
 }
 
-void RegBankSelect::applyMapping(
+bool RegBankSelect::applyMapping(
     MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
     SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
   // OpdMapper will hold all the information needed for the rewritting.
@@ -474,28 +504,27 @@ void RegBankSelect::applyMapping(
 
   // First, place the repairing code.
   for (RepairingPlacement &RepairPt : RepairPts) {
-    assert(RepairPt.canMaterialize() &&
-           RepairPt.getKind() != RepairingPlacement::Impossible &&
-           "This mapping is impossible");
+    if (!RepairPt.canMaterialize() ||
+        RepairPt.getKind() == RepairingPlacement::Impossible)
+      return false;
     assert(RepairPt.getKind() != RepairingPlacement::None &&
            "This should not make its way in the list");
     unsigned OpIdx = RepairPt.getOpIdx();
     MachineOperand &MO = MI.getOperand(OpIdx);
     const RegisterBankInfo::ValueMapping &ValMapping =
         InstrMapping.getOperandMapping(OpIdx);
-    unsigned BreakDownSize = ValMapping.BreakDown.size();
-    (void)BreakDownSize;
     unsigned Reg = MO.getReg();
 
     switch (RepairPt.getKind()) {
     case RepairingPlacement::Reassign:
-      assert(BreakDownSize == 1 &&
+      assert(ValMapping.NumBreakDowns == 1 &&
              "Reassignment should only be for simple mapping");
       MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
       break;
     case RepairingPlacement::Insert:
       OpdMapper.createVRegs(OpIdx);
-      repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx));
+      if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
+        return false;
       break;
     default:
       llvm_unreachable("Other kind should not happen");
@@ -504,9 +533,10 @@ void RegBankSelect::applyMapping(
   // Second, rewrite the instruction.
   DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
   RBI->applyMapping(OpdMapper);
+  return true;
 }
 
-void RegBankSelect::assignInstr(MachineInstr &MI) {
+bool RegBankSelect::assignInstr(MachineInstr &MI) {
   DEBUG(dbgs() << "Assign: " << MI);
   // Remember the repairing placement for all the operands.
   SmallVector<RepairingPlacement, 4> RepairPts;
@@ -516,32 +546,63 @@ void RegBankSelect::assignInstr(MachineInstr &MI) {
     BestMapping = RBI->getInstrMapping(MI);
     MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
     (void)DefaultCost;
-    assert(DefaultCost != MappingCost::ImpossibleCost() &&
-           "Default mapping is not suited");
+    if (DefaultCost == MappingCost::ImpossibleCost())
+      return false;
   } else {
     RegisterBankInfo::InstructionMappings PossibleMappings =
         RBI->getInstrPossibleMappings(MI);
-    assert(!PossibleMappings.empty() &&
-           "Do not know how to map this instruction");
+    if (PossibleMappings.empty())
+      return false;
     BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
   }
   // Make sure the mapping is valid for MI.
   assert(BestMapping.verify(MI) && "Invalid instruction mapping");
 
-  DEBUG(dbgs() << "Mapping: " << BestMapping << '\n');
+  DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n');
 
   // After this call, MI may not be valid anymore.
   // Do not use it.
-  applyMapping(MI, BestMapping, RepairPts);
+  return applyMapping(MI, BestMapping, RepairPts);
 }
 
 bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+
   DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
   const Function *F = MF.getFunction();
   Mode SaveOptMode = OptMode;
   if (F->hasFnAttribute(Attribute::OptimizeNone))
     OptMode = Mode::Fast;
   init(MF);
+
+#ifndef NDEBUG
+  // Check that our input is fully legal: we require the function to have the
+  // Legalized property, so it should be.
+  // FIXME: This should be in the MachineVerifier, but it can't use the
+  // LegalizerInfo as it's currently in the separate GlobalISel library.
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+          if (!TPC->isGlobalISelAbortEnabled()) {
+            MF.getProperties().set(
+                MachineFunctionProperties::Property::FailedISel);
+            return false;
+          }
+          std::string ErrStorage;
+          raw_string_ostream Err(ErrStorage);
+          Err << "Instruction is not legal: " << MI << '\n';
+          report_fatal_error(Err.str());
+        }
+      }
+    }
+  }
+#endif
+
   // Walk the function and assign register banks to all operands.
   // Use a RPOT to make sure all registers are assigned before we choose
   // the best mapping of the current instruction.
@@ -554,7 +615,18 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
          MII != End;) {
       // MI might be invalidated by the assignment, so move the
       // iterator before hand.
-      assignInstr(*MII++);
+      MachineInstr &MI = *MII++;
+
+      // Ignore target-specific instructions: they should use proper regclasses.
+      if (isTargetSpecificOpcode(MI.getOpcode()))
+        continue;
+
+      if (!assignInstr(MI)) {
+        if (TPC->isGlobalISelAbortEnabled())
+          report_fatal_error("Unable to map instruction");
+        MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+        return false;
+      }
     }
   }
   OptMode = SaveOptMode;
@@ -895,3 +967,20 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
   return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
          LocalFreq == Cost.LocalFreq;
 }
+
+void RegBankSelect::MappingCost::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
+  if (*this == ImpossibleCost()) {
+    OS << "impossible";
+    return;
+  }
+  if (isSaturated()) {
+    OS << "saturated";
+    return;
+  }
+  OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index a911225..49d676f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -19,12 +19,15 @@ using namespace llvm;
 
 const unsigned RegisterBank::InvalidID = UINT_MAX;
 
-RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {}
+RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size,
+                           const uint32_t *CoveredClasses)
+    : ID(ID), Name(Name), Size(Size) {
+  ContainedRegClasses.resize(200);
+  ContainedRegClasses.setBitsInMask(CoveredClasses);
+}
 
 bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
   assert(isValid() && "Invalid register bank");
-  assert(ContainedRegClasses.size() == TRI.getNumRegClasses() &&
-         "TRI does not match the initialization process?");
   for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
     const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
 
@@ -72,7 +75,7 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
   return &OtherRB == this;
 }
 
-void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
   print(dbgs(), /* IsForDebug */ true, TRI);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index ef8e4f6..da5ab0b 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -32,139 +33,56 @@
 
 using namespace llvm;
 
+STATISTIC(NumPartialMappingsCreated,
+          "Number of partial mappings dynamically created");
+STATISTIC(NumPartialMappingsAccessed,
+          "Number of partial mappings dynamically accessed");
+STATISTIC(NumValueMappingsCreated,
+          "Number of value mappings dynamically created");
+STATISTIC(NumValueMappingsAccessed,
+          "Number of value mappings dynamically accessed");
+STATISTIC(NumOperandsMappingsCreated,
+          "Number of operands mappings dynamically created");
+STATISTIC(NumOperandsMappingsAccessed,
+          "Number of operands mappings dynamically accessed");
+
 const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
 const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
 
 //------------------------------------------------------------------------------
 // RegisterBankInfo implementation.
 //------------------------------------------------------------------------------
-RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks)
-    : NumRegBanks(NumRegBanks) {
-  RegBanks.reset(new RegisterBank[NumRegBanks]);
+RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
+                                   unsigned NumRegBanks)
+    : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
+#ifndef NDEBUG
+  for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+    assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
+    assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid");
+  }
+#endif // NDEBUG
+}
+
+RegisterBankInfo::~RegisterBankInfo() {
+  for (auto It : MapOfPartialMappings)
+    delete It.second;
+  for (auto It : MapOfValueMappings)
+    delete It.second;
 }
 
 bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
-  DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+  for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
     const RegisterBank &RegBank = getRegBank(Idx);
     assert(Idx == RegBank.getID() &&
            "ID does not match the index in the array");
-    dbgs() << "Verify " << RegBank << '\n';
+    DEBUG(dbgs() << "Verify " << RegBank << '\n');
     assert(RegBank.verify(TRI) && "RegBank is invalid");
-  });
+  }
+#endif // NDEBUG
   return true;
 }
 
-void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) {
-  DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name
-               << "\"\n");
-  RegisterBank &RegBank = getRegBank(ID);
-  assert(RegBank.getID() == RegisterBank::InvalidID &&
-         "A register bank should be created only once");
-  RegBank.ID = ID;
-  RegBank.Name = Name;
-}
-
-void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
-                                          const TargetRegisterInfo &TRI,
-                                          bool AddTypeMapping) {
-  RegisterBank &RB = getRegBank(ID);
-  unsigned NbOfRegClasses = TRI.getNumRegClasses();
-
-  DEBUG(dbgs() << "Add coverage for: " << RB << '\n');
-
-  // Check if RB is underconstruction.
-  if (!RB.isValid())
-    RB.ContainedRegClasses.resize(NbOfRegClasses);
-  else if (RB.covers(*TRI.getRegClass(RCId)))
-    // If RB already covers this register class, there is nothing
-    // to do.
-    return;
-
-  BitVector &Covered = RB.ContainedRegClasses;
-  SmallVector<unsigned, 8> WorkList;
-
-  WorkList.push_back(RCId);
-  Covered.set(RCId);
-
-  unsigned &MaxSize = RB.Size;
-  do {
-    unsigned RCId = WorkList.pop_back_val();
-
-    const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId);
-
-    DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC)
-                 << "(Size*8: " << (CurRC.getSize() * 8) << ")\n");
-
-    // Remember the biggest size in bits.
-    MaxSize = std::max(MaxSize, CurRC.getSize() * 8);
-
-    // If we have been asked to record the type supported by this
-    // register bank, do it now.
-    if (AddTypeMapping)
-      for (MVT::SimpleValueType SVT :
-           make_range(CurRC.vt_begin(), CurRC.vt_end()))
-        recordRegBankForType(getRegBank(ID), SVT);
-
-    // Walk through all sub register classes and push them into the worklist.
-    bool First = true;
-    for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid();
-         ++It) {
-      unsigned SubRCId = It.getID();
-      if (!Covered.test(SubRCId)) {
-        if (First)
-          DEBUG(dbgs() << "  Enqueue sub-class: ");
-        DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", ");
-        WorkList.push_back(SubRCId);
-        // Remember that we saw the sub class.
-        Covered.set(SubRCId);
-        First = false;
-      }
-    }
-    if (!First)
-      DEBUG(dbgs() << '\n');
-
-    // Push also all the register classes that can be accessed via a
-    // subreg index, i.e., its subreg-class (which is different than
-    // its subclass).
-    //
-    // Note: It would probably be faster to go the other way around
-    // and have this method add only super classes, since this
-    // information is available in a more efficient way. However, it
-    // feels less natural for the client of this APIs plus we will
-    // TableGen the whole bitset at some point, so compile time for
-    // the initialization is not very important.
-    First = true;
-    for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) {
-      if (Covered.test(SubRCId))
-        continue;
-      bool Pushed = false;
-      const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId);
-      for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid();
-           ++SuperRCIt) {
-        if (Pushed)
-          break;
-        for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid();
-             ++It) {
-          unsigned SuperRCId = It.getID();
-          if (SuperRCId == RCId) {
-            if (First)
-              DEBUG(dbgs() << "  Enqueue subreg-class: ");
-            DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", ");
-            WorkList.push_back(SubRCId);
-            // Remember that we saw the sub class.
-            Covered.set(SubRCId);
-            Pushed = true;
-            First = false;
-            break;
-          }
-        }
-      }
-    }
-    if (!First)
-      DEBUG(dbgs() << '\n');
-  } while (!WorkList.empty());
-}
-
 const RegisterBank *
 RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
                              const TargetRegisterInfo &TRI) const {
@@ -173,11 +91,9 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
 
   assert(Reg && "NoRegister does not have a register bank");
   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
-  if (RegClassOrBank.is<const RegisterBank *>())
-    return RegClassOrBank.get<const RegisterBank *>();
-  const TargetRegisterClass *RC =
-      RegClassOrBank.get<const TargetRegisterClass *>();
-  if (RC)
+  if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+    return RB;
+  if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
     return &getRegBankFromRegClass(*RC);
   return nullptr;
 }
@@ -199,10 +115,37 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
   return &RegBank;
 }
 
+const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
+    unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+
+  // If the register already has a class, fallback to MRI::constrainRegClass.
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+  if (RegClassOrBank.is<const TargetRegisterClass *>())
+    return MRI.constrainRegClass(Reg, &RC);
+
+  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+  // Otherwise, all we can do is ensure the bank covers the class, and set it.
+  if (RB && !RB->covers(RC))
+    return nullptr;
+
+  // If nothing was set or the class is simply compatible, set it.
+  MRI.setRegClass(Reg, &RC);
+  return &RC;
+}
+
 RegisterBankInfo::InstructionMapping
 RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+  // For copies we want to walk over the operands and try to find one
+  // that has a register bank since the instruction itself will not get
+  // us any constraint.
+  bool isCopyLike = MI.isCopy() || MI.isPHI();
+  // For copy like instruction, only the mapping of the definition
+  // is important. The rest is not constrained.
+  unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+
   RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
-                                               MI.getNumOperands());
+                                               /*OperandsMapping*/ nullptr,
+                                               NumOperandsForMapping);
   const MachineFunction &MF = *MI.getParent()->getParent();
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -213,14 +156,10 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
   // Before doing anything complicated check if the mapping is not
   // directly available.
   bool CompleteMapping = true;
-  // For copies we want to walk over the operands and try to find one
-  // that has a register bank.
-  bool isCopyLike = MI.isCopy() || MI.isPHI();
-  // Remember the register bank for reuse for copy-like instructions.
-  const RegisterBank *RegBank = nullptr;
-  // Remember the size of the register for reuse for copy-like instructions.
-  unsigned RegSize = 0;
-  for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+
+  SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping);
+  for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+       ++OpIdx) {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
@@ -242,71 +181,147 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
       // the register bank from the encoding constraints.
       CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
       if (!CurRegBank) {
-        // Check if we can deduce the register bank from the type of
-        // the instruction.
-        Type *MITy = MI.getType();
-        if (MITy)
-          CurRegBank = getRegBankForType(
-              MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy);
-        if (!CurRegBank)
-          // Use the current assigned register bank.
-          // That may not make much sense though.
-          CurRegBank = AltRegBank;
-        if (!CurRegBank) {
-          // All our attempts failed, give up.
-          CompleteMapping = false;
-
-          if (!isCopyLike)
-            // MI does not carry enough information to guess the mapping.
-            return InstructionMapping();
-
-          // For copies, we want to keep interating to find a register
-          // bank for the other operands if we did not find one yet.
-          if (RegBank)
-            break;
-          continue;
-        }
+        // All our attempts failed, give up.
+        CompleteMapping = false;
+
+        if (!isCopyLike)
+          // MI does not carry enough information to guess the mapping.
+          return InstructionMapping();
+        continue;
       }
     }
-    RegBank = CurRegBank;
-    RegSize = getSizeInBits(Reg, MRI, TRI);
-    Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank);
+    const ValueMapping *ValMapping =
+        &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+    if (isCopyLike) {
+      OperandsMapping[0] = ValMapping;
+      CompleteMapping = true;
+      break;
+    }
+    OperandsMapping[OpIdx] = ValMapping;
   }
 
-  if (CompleteMapping)
-    return Mapping;
-
-  assert(isCopyLike && "We should have bailed on non-copies at this point");
-  // For copy like instruction, if none of the operand has a register
-  // bank avialable, there is nothing we can propagate.
-  if (!RegBank)
+  if (isCopyLike && !CompleteMapping)
+    // No way to deduce the type from what we have.
     return InstructionMapping();
 
-  // This is a copy-like instruction.
-  // Propagate RegBank to all operands that do not have a
-  // mapping yet.
-  for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
-    const MachineOperand &MO = MI.getOperand(OpIdx);
-    // Don't assign a mapping for non-reg operands.
-    if (!MO.isReg())
-      continue;
+  assert(CompleteMapping && "Setting an uncomplete mapping");
+  Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping));
+  return Mapping;
+}
 
-    // If a mapping already exists, do not touch it.
-    if (!static_cast<const InstructionMapping *>(&Mapping)
-             ->getOperandMapping(OpIdx)
-             .BreakDown.empty())
-      continue;
+/// Hashing function for PartialMapping.
+static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length,
+                                    const RegisterBank *RegBank) {
+  return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0);
+}
+
+/// Overloaded version of hash_value for a PartialMapping.
+hash_code
+llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) {
+  return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length,
+                            PartMapping.RegBank);
+}
+
+const RegisterBankInfo::PartialMapping &
+RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
+                                    const RegisterBank &RegBank) const {
+  ++NumPartialMappingsAccessed;
+
+  hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank);
+  const auto &It = MapOfPartialMappings.find(Hash);
+  if (It != MapOfPartialMappings.end())
+    return *It->second;
+
+  ++NumPartialMappingsCreated;
+
+  const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
+  PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+  return *PartMapping;
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length,
+                                  const RegisterBank &RegBank) const {
+  return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1);
+}
+
+static hash_code
+hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown,
+                 unsigned NumBreakDowns) {
+  if (LLVM_LIKELY(NumBreakDowns == 1))
+    return hash_value(*BreakDown);
+  SmallVector<size_t, 8> Hashes(NumBreakDowns);
+  for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx)
+    Hashes.push_back(hash_value(BreakDown[Idx]));
+  return hash_combine_range(Hashes.begin(), Hashes.end());
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
+                                  unsigned NumBreakDowns) const {
+  ++NumValueMappingsAccessed;
+
+  hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns);
+  const auto &It = MapOfValueMappings.find(Hash);
+  if (It != MapOfValueMappings.end())
+    return *It->second;
+
+  ++NumValueMappingsCreated;
+
+  const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
+  ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+  return *ValMapping;
+}
 
-    Mapping.setOperandMapping(OpIdx, RegSize, *RegBank);
+template <typename Iterator>
+const RegisterBankInfo::ValueMapping *
+RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
+
+  ++NumOperandsMappingsAccessed;
+
+  // The addresses of the value mapping are unique.
+  // Therefore, we can use them directly to hash the operand mapping.
+  hash_code Hash = hash_combine_range(Begin, End);
+  const auto &It = MapOfOperandsMappings.find(Hash);
+  if (It != MapOfOperandsMappings.end())
+    return It->second;
+
+  ++NumOperandsMappingsCreated;
+
+  // Create the array of ValueMapping.
+  // Note: this array will not hash to this instance of operands
+  // mapping, because we use the pointer of the ValueMapping
+  // to hash and we expect them to uniquely identify an instance
+  // of value mapping.
+  ValueMapping *&Res = MapOfOperandsMappings[Hash];
+  Res = new ValueMapping[std::distance(Begin, End)];
+  unsigned Idx = 0;
+  for (Iterator It = Begin; It != End; ++It, ++Idx) {
+    const ValueMapping *ValMap = *It;
+    if (!ValMap)
+      continue;
+    Res[Idx] = *ValMap;
   }
-  return Mapping;
+  return Res;
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+    const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping)
+    const {
+  return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+    std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping)
+    const {
+  return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
 }
 
 RegisterBankInfo::InstructionMapping
 RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
-    RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
-    if (Mapping.isValid())
-      return Mapping;
+  RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+  if (Mapping.isValid())
+    return Mapping;
   llvm_unreachable("The target must implement this");
 }
 
@@ -335,18 +350,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
 void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
   MachineInstr &MI = OpdMapper.getMI();
   DEBUG(dbgs() << "Applying default-like mapping\n");
-  for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
-       ++OpIdx) {
+  for (unsigned OpIdx = 0,
+                EndIdx = OpdMapper.getInstrMapping().getNumOperands();
+       OpIdx != EndIdx; ++OpIdx) {
     DEBUG(dbgs() << "OpIdx " << OpIdx);
     MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg()) {
       DEBUG(dbgs() << " is not a register, nothing to be done\n");
       continue;
     }
-    assert(
-        OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() ==
-            1 &&
-        "This mapping is too complex for this function");
+    assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
+               1 &&
+           "This mapping is too complex for this function");
     iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
         OpdMapper.getVRegs(OpIdx);
     if (NewRegs.begin() == NewRegs.end()) {
@@ -369,7 +384,8 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
     // get the size of that register class.
     RC = TRI.getMinimalPhysRegClass(Reg);
   } else {
-    unsigned RegSize = MRI.getSize(Reg);
+    LLT Ty = MRI.getType(Reg);
+    unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
     // If Reg is not a generic register, query the register class to
     // get its size.
     if (RegSize)
@@ -384,7 +400,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
 //------------------------------------------------------------------------------
 // Helper classes implementation.
 //------------------------------------------------------------------------------
-void RegisterBankInfo::PartialMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
@@ -392,7 +408,7 @@ void RegisterBankInfo::PartialMapping::dump() const {
 bool RegisterBankInfo::PartialMapping::verify() const {
   assert(RegBank && "Register bank not set");
   assert(Length && "Empty mapping");
-  assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?");
+  assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
   // Check if the minimum width fits into RegBank.
   assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
   return true;
@@ -406,10 +422,10 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
     OS << "nullptr";
 }
 
-bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
-  assert(!BreakDown.empty() && "Value mapped nowhere?!");
+bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
+  assert(NumBreakDowns && "Value mapped nowhere?!");
   unsigned OrigValueBitWidth = 0;
-  for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+  for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
     // Check that each register bank is big enough to hold the partial value:
     // this check is done by PartialMapping::verify
     assert(PartMap.verify() && "Partial mapping is invalid");
@@ -418,9 +434,10 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
     OrigValueBitWidth =
         std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
   }
-  assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match");
+  assert(OrigValueBitWidth >= MeaningfulBitWidth &&
+         "Meaningful bits not covered by the mapping");
   APInt ValueMask(OrigValueBitWidth, 0);
-  for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+  for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
     // Check that the union of the partial mappings covers the whole value,
     // without overlaps.
     // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
@@ -434,15 +451,15 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
   return true;
 }
 
-void RegisterBankInfo::ValueMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
 
 void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
-  OS << "#BreakDown: " << BreakDown.size() << " ";
+  OS << "#BreakDown: " << NumBreakDowns << " ";
   bool IsFirst = true;
-  for (const PartialMapping &PartMap : BreakDown) {
+  for (const PartialMapping &PartMap : *this) {
     if (!IsFirst)
       OS << ", ";
     OS << '[' << PartMap << ']';
@@ -450,21 +467,13 @@ void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
   }
 }
 
-void RegisterBankInfo::InstructionMapping::setOperandMapping(
-    unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) {
-  // Build the value mapping.
-  assert(MaskSize <= RegBank.getSize() && "Register bank is too small");
-
-  // Create the mapping object.
-  getOperandMapping(OpIdx).BreakDown.push_back(
-      PartialMapping(0, MaskSize, RegBank));
-}
-
 bool RegisterBankInfo::InstructionMapping::verify(
     const MachineInstr &MI) const {
   // Check that all the register operands are properly mapped.
   // Check the constructor invariant.
-  assert(NumOperands == MI.getNumOperands() &&
+  // For PHI, we only care about mapping the definition.
+  assert(NumOperands ==
+             ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
          "NumOperands must match, see constructor");
   assert(MI.getParent() && MI.getParent()->getParent() &&
          "MI must be connected to a MachineFunction");
@@ -473,16 +482,18 @@ bool RegisterBankInfo::InstructionMapping::verify(
 
   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
     const MachineOperand &MO = MI.getOperand(Idx);
-    const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
-    (void)MOMapping;
     if (!MO.isReg()) {
-      assert(MOMapping.BreakDown.empty() &&
+      assert(!getOperandMapping(Idx).isValid() &&
              "We should not care about non-reg mapping");
       continue;
     }
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
+    assert(getOperandMapping(Idx).isValid() &&
+           "We must have a mapping for reg operands");
+    const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+    (void)MOMapping;
     // Register size in bits.
     // This size must match what the mapping expects.
     assert(MOMapping.verify(getSizeInBits(
@@ -492,7 +503,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
   return true;
 }
 
-void RegisterBankInfo::InstructionMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
@@ -514,18 +525,16 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
     MachineInstr &MI, const InstructionMapping &InstrMapping,
     MachineRegisterInfo &MRI)
     : MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
-  unsigned NumOpds = MI.getNumOperands();
-  OpToNewVRegIdx.reset(new int[NumOpds]);
-  std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds],
-            OperandsMapper::DontKnowIdx);
+  unsigned NumOpds = InstrMapping.getNumOperands();
+  OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx);
   assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
 }
 
 iterator_range<SmallVectorImpl<unsigned>::iterator>
 RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   unsigned NumPartialVal =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+      getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
   int StartIdx = OpToNewVRegIdx[OpIdx];
 
   if (StartIdx == OperandsMapper::DontKnowIdx) {
@@ -559,16 +568,15 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
 }
 
 void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
       getVRegsMem(OpIdx);
-  const SmallVectorImpl<PartialMapping> &PartMapList =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown;
-  SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin();
+  const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
+  const PartialMapping *PartMap = ValMapping.begin();
   for (unsigned &NewVReg : NewVRegsForOpIdx) {
-    assert(PartMap != PartMapList.end() && "Out-of-bound access");
+    assert(PartMap != ValMapping.end() && "Out-of-bound access");
     assert(NewVReg == 0 && "Register has already been created");
-    NewVReg = MRI.createGenericVirtualRegister(PartMap->Length);
+    NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
     MRI.setRegBank(NewVReg, *PartMap->RegBank);
     ++PartMap;
   }
@@ -577,8 +585,8 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
 void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
                                                 unsigned PartialMapIdx,
                                                 unsigned NewVReg) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
-  assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+  assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
              PartialMapIdx &&
          "Out-of-bound access for partial mapping");
   // Make sure the memory is initialized for that operand.
@@ -592,14 +600,14 @@ iterator_range<SmallVectorImpl<unsigned>::const_iterator>
 RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
                                            bool ForDebug) const {
   (void)ForDebug;
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   int StartIdx = OpToNewVRegIdx[OpIdx];
 
   if (StartIdx == OperandsMapper::DontKnowIdx)
     return make_range(NewVRegs.end(), NewVRegs.end());
 
   unsigned PartMapSize =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+      getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
   SmallVectorImpl<unsigned>::const_iterator End =
       getNewVRegsEnd(StartIdx, PartMapSize);
   iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
@@ -611,14 +619,14 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
   return Res;
 }
 
-void RegisterBankInfo::OperandsMapper::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
   print(dbgs(), true);
   dbgs() << '\n';
 }
 
 void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
                                              bool ForDebug) const {
-  unsigned NumOpds = getMI().getNumOperands();
+  unsigned NumOpds = getInstrMapping().getNumOperands();
   if (ForDebug) {
     OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
     // Print out the internal state of the index table.
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
new file mode 100644
index 0000000..e500918
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -0,0 +1,45 @@
+//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file This file implements the utility functions used by the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "globalisel-utils"
+
+using namespace llvm;
+
+unsigned llvm::constrainOperandRegClass(
+    const MachineFunction &MF, const TargetRegisterInfo &TRI,
+    MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+    const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
+    unsigned Reg, unsigned OpIdx) {
+  // Assume physical registers are properly constrained.
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "PhysReg not implemented");
+
+  const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+
+  if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
+    unsigned NewReg = MRI.createVirtualRegister(RegClass);
+    BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+            TII.get(TargetOpcode::COPY), NewReg)
+        .addReg(Reg);
+    return NewReg;
+  }
+
+  return Reg;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 8c760b7..1ea5349 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -182,9 +182,7 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool doFinalization(Module &M) override;
 
-    const char *getPassName() const override {
-      return "Merge internal globals";
-    }
+    StringRef getPassName() const override { return "Merge internal globals"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
@@ -434,6 +432,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
 
+    bool HasExternal = false;
+    StringRef FirstExternalName;
     for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getValueType();
       MergedSize += DL.getTypeAllocSize(Ty);
@@ -442,19 +442,46 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
       }
       Tys.push_back(Ty);
       Inits.push_back(Globals[j]->getInitializer());
+
+      if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+        HasExternal = true;
+        FirstExternalName = Globals[j]->getName();
+      }
     }
 
+    // If merged variables doesn't have external linkage, we needn't to expose
+    // the symbol after merging.
+    GlobalValue::LinkageTypes Linkage = HasExternal
+                                            ? GlobalValue::ExternalLinkage
+                                            : GlobalValue::InternalLinkage;
     StructType *MergedTy = StructType::get(M.getContext(), Tys);
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
 
-    GlobalVariable *MergedGV = new GlobalVariable(
-        M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
-        "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+    // On Darwin external linkage needs to be preserved, otherwise
+    // dsymutil cannot preserve the debug info for the merged
+    // variables.  If they have external linkage, use the symbol name
+    // of the first variable merged as the suffix of global symbol
+    // name.  This avoids a link-time naming conflict for the
+    // _MergedGlobals symbols.
+    Twine MergedName =
+        (IsMachO && HasExternal)
+            ? "_MergedGlobals_" + FirstExternalName
+            : "_MergedGlobals";
+    auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
+    auto *MergedGV = new GlobalVariable(
+        M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
+        GlobalVariable::NotThreadLocal, AddrSpace);
+
+    const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
 
     for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
 
+      // Copy metadata while adjusting any debug info metadata by the original
+      // global's offset within the merged global.
+      MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx));
+
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
         ConstantInt::get(Int32Ty, idx),
@@ -498,22 +525,18 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
 void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
   collectUsedGlobalVariables(M);
 
-  for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
-       ++IFn) {
-    for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
-         IBB != IEndBB; ++IBB) {
-      // Follow the invoke link to find the landing pad instruction
-      const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
-      if (!II) continue;
-
-      const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
-      // Look for globals in the clauses of the landing pad instruction
-      for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
-           Idx != NumClauses; ++Idx)
+  for (Function &F : M) {
+    for (BasicBlock &BB : F) {
+      Instruction *Pad = BB.getFirstNonPHI();
+      if (!Pad->isEHPad())
+        continue;
+
+      // Keep globals used by landingpads and catchpads.
+      for (const Use &U : Pad->operands()) {
         if (const GlobalVariable *GV =
-            dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
-                                     ->stripPointerCasts()))
+                dyn_cast<GlobalVariable>(U->stripPointerCasts()))
           MustKeepGlobalVariables.insert(GV);
+      }
     }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index d225162..b9f3d86 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "BranchFolding.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -58,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
                                        cl::init(false), cl::Hidden);
 static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
                                     cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
+                                        cl::init(false), cl::Hidden);
 static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
                                      cl::init(true), cl::Hidden);
 
@@ -68,6 +71,7 @@ STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");
 STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
 STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
 STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
+STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
 STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
 STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
@@ -82,10 +86,12 @@ namespace {
       ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
       ICTriangleFalse, // Same as ICTriangle, but on the false path.
       ICTriangle,      // BB is entry of a triangle sub-CFG.
-      ICDiamond        // BB is entry of a diamond sub-CFG.
+      ICDiamond,       // BB is entry of a diamond sub-CFG.
+      ICForkedDiamond  // BB is entry of an almost diamond sub-CFG, with a
+                       // common tail that can be shared.
     };
 
-    /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+    /// One per MachineBasicBlock, this is used to cache the result
     /// if-conversion feasibility analysis. This includes results from
     /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
     /// classification, and common tail block of its successors (if it's a
@@ -114,6 +120,7 @@ namespace {
       bool IsAnalyzed      : 1;
       bool IsEnqueued      : 1;
       bool IsBrAnalyzable  : 1;
+      bool IsBrReversible  : 1;
       bool HasFallThrough  : 1;
       bool IsUnpredicable  : 1;
       bool CannotBeCopied  : 1;
@@ -128,13 +135,14 @@ namespace {
       SmallVector<MachineOperand, 4> Predicate;
       BBInfo() : IsDone(false), IsBeingAnalyzed(false),
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
-                 HasFallThrough(false), IsUnpredicable(false),
-                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+                 IsBrReversible(false), HasFallThrough(false),
+                 IsUnpredicable(false), CannotBeCopied(false),
+                 ClobbersPred(false), NonPredSize(0), ExtraCost(0),
+                 ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
                  FalseBB(nullptr) {}
     };
 
-    /// IfcvtToken - Record information about pending if-conversions to attempt:
+    /// Record information about pending if-conversions to attempt:
     /// BBI             - Corresponding BBInfo.
     /// Kind            - Type of block. See IfcvtKind.
     /// NeedSubsumption - True if the to-be-predicated BB has already been
@@ -148,15 +156,19 @@ namespace {
     struct IfcvtToken {
       BBInfo &BBI;
       IfcvtKind Kind;
-      bool NeedSubsumption;
       unsigned NumDups;
       unsigned NumDups2;
-      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
-        : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+      bool NeedSubsumption : 1;
+      bool TClobbersPred : 1;
+      bool FClobbersPred : 1;
+      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
+                 bool tc = false, bool fc = false)
+        : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
+          TClobbersPred(tc), FClobbersPred(fc) {}
     };
 
-    /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
-    /// basic block number.
+    /// Results of if-conversion feasibility analysis indexed by basic block
+    /// number.
     std::vector<BBInfo> BBAnalysis;
     TargetSchedModel SchedModel;
 
@@ -172,11 +184,11 @@ namespace {
     bool PreRegAlloc;
     bool MadeChange;
     int FnNum;
-    std::function<bool(const Function &)> PredicateFtor;
+    std::function<bool(const MachineFunction &)> PredicateFtor;
 
   public:
     static char ID;
-    IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
+    IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
         : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
       initializeIfConverterPass(*PassRegistry::getPassRegistry());
     }
@@ -191,31 +203,58 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
-    bool ReverseBranchCondition(BBInfo &BBI);
+    bool reverseBranchCondition(BBInfo &BBI) const;
     bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
                      BranchProbability Prediction) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                        bool FalseBranch, unsigned &Dups,
                        BranchProbability Prediction) const;
+    bool CountDuplicatedInstructions(
+        MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+        MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+        unsigned &Dups1, unsigned &Dups2,
+        MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+        bool SkipUnconditionalBranches) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                      unsigned &Dups1, unsigned &Dups2) const;
-    void ScanInstructions(BBInfo &BBI);
-    void AnalyzeBlock(MachineBasicBlock *MBB,
+                      unsigned &Dups1, unsigned &Dups2,
+                      BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+    bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                            unsigned &Dups1, unsigned &Dups2,
+                            BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+    void AnalyzeBranches(BBInfo &BBI);
+    void ScanInstructions(BBInfo &BBI,
+                          MachineBasicBlock::iterator &Begin,
+                          MachineBasicBlock::iterator &End,
+                          bool BranchUnpredicable = false) const;
+    bool RescanInstructions(
+        MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+        MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+        BBInfo &TrueBBI, BBInfo &FalseBBI) const;
+    void AnalyzeBlock(MachineBasicBlock &MBB,
                       std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
     bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
-                             bool isTriangle = false, bool RevBranch = false);
+                             bool isTriangle = false, bool RevBranch = false,
+                             bool hasCommonTail = false);
     void AnalyzeBlocks(MachineFunction &MF,
                        std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
-    void InvalidatePreds(MachineBasicBlock *BB);
+    void InvalidatePreds(MachineBasicBlock &MBB);
     void RemoveExtraEdges(BBInfo &BBI);
     bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
     bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+                                unsigned NumDups1, unsigned NumDups2,
+                                bool TClobbersPred, bool FClobbersPred,
+                                bool RemoveBranch, bool MergeAddEdges);
     bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
-                          unsigned NumDups1, unsigned NumDups2);
+                          unsigned NumDups1, unsigned NumDups2,
+                          bool TClobbers, bool FClobbers);
+    bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
+                              unsigned NumDups1, unsigned NumDups2,
+                              bool TClobbers, bool FClobbers);
     void PredicateBlock(BBInfo &BBI,
                         MachineBasicBlock::iterator E,
                         SmallVectorImpl<MachineOperand> &Cond,
@@ -242,12 +281,12 @@ namespace {
                                  Prediction);
     }
 
-    // blockAlwaysFallThrough - Block ends without a terminator.
+    /// Returns true if Block ends without a terminator.
     bool blockAlwaysFallThrough(BBInfo &BBI) const {
       return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
     }
 
-    // IfcvtTokenCmp - Used to sort if-conversion candidates.
+    /// Used to sort if-conversion candidates.
     static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
                               const std::unique_ptr<IfcvtToken> &C2) {
       int Incr1 = (C1->Kind == ICDiamond)
@@ -282,8 +321,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()) ||
-      (PredicateFtor && !PredicateFtor(*MF.getFunction())))
+  if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
     return false;
 
   const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -402,11 +440,26 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
-        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
+                                  Token->TClobbersPred,
+                                  Token->FClobbersPred);
         DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) ++NumDiamonds;
         break;
       }
+      case ICForkedDiamond: {
+        if (DisableForkedDiamond) break;
+        DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
+                     << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
+                                      Token->TClobbersPred,
+                                      Token->FClobbersPred);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) ++NumForkedDiamonds;
+        break;
+      }
       }
 
       Change |= RetVal;
@@ -435,46 +488,42 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   return MadeChange;
 }
 
-/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
-/// its 'true' successor.
+/// BB has a fallthrough. Find its 'false' successor given its 'true' successor.
 static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
                                          MachineBasicBlock *TrueBB) {
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         E = BB->succ_end(); SI != E; ++SI) {
-    MachineBasicBlock *SuccBB = *SI;
+  for (MachineBasicBlock *SuccBB : BB->successors()) {
     if (SuccBB != TrueBB)
       return SuccBB;
   }
   return nullptr;
 }
 
-/// ReverseBranchCondition - Reverse the condition of the end of the block
-/// branch. Swap block's 'true' and 'false' successors.
-bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+/// Reverse the condition of the end of the block branch. Swap block's 'true'
+/// and 'false' successors.
+bool IfConverter::reverseBranchCondition(BBInfo &BBI) const {
   DebugLoc dl;  // FIXME: this is nowhere
-  if (!TII->ReverseBranchCondition(BBI.BrCond)) {
-    TII->RemoveBranch(*BBI.BB);
-    TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+  if (!TII->reverseBranchCondition(BBI.BrCond)) {
+    TII->removeBranch(*BBI.BB);
+    TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
     std::swap(BBI.TrueBB, BBI.FalseBB);
     return true;
   }
   return false;
 }
 
-/// getNextBlock - Returns the next block in the function blocks ordering. If
-/// it is the end, returns NULL.
-static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
-  MachineFunction::iterator I = BB->getIterator();
-  MachineFunction::iterator E = BB->getParent()->end();
+/// Returns the next block in the function blocks ordering. If it is the end,
+/// returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) {
+  MachineFunction::iterator I = MBB.getIterator();
+  MachineFunction::iterator E = MBB.getParent()->end();
   if (++I == E)
     return nullptr;
   return &*I;
 }
 
-/// ValidSimple - Returns true if the 'true' block (along with its
-/// predecessor) forms a valid simple shape for ifcvt. It also returns the
-/// number of instructions that the ifcvt would need to duplicate if performed
-/// in Dups.
+/// Returns true if the 'true' block (along with its predecessor) forms a valid
+/// simple shape for ifcvt. It also returns the number of instructions that the
+/// ifcvt would need to duplicate if performed in Dups.
 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
                               BranchProbability Prediction) const {
   Dups = 0;
@@ -495,12 +544,11 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
   return true;
 }
 
-/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid triangle shape for ifcvt.
-/// If 'FalseBranch' is true, it checks if 'true' block's false branch
-/// branches to the 'false' block rather than the other way around. It also
-/// returns the number of instructions that the ifcvt would need to duplicate
-/// if performed in 'Dups'.
+/// Returns true if the 'true' and 'false' blocks (along with their common
+/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is
+/// true, it checks if 'true' block's false branch branches to the 'false' block
+/// rather than the other way around. It also returns the number of instructions
+/// that the ifcvt would need to duplicate if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                                 bool FalseBranch, unsigned &Dups,
                                 BranchProbability Prediction) const {
@@ -540,122 +588,353 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
   return TExit && TExit == FalseBBI.BB;
 }
 
-/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid diamond shape for ifcvt.
-bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                               unsigned &Dups1, unsigned &Dups2) const {
-  Dups1 = Dups2 = 0;
-  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
-      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
-    return false;
-
-  MachineBasicBlock *TT = TrueBBI.TrueBB;
-  MachineBasicBlock *FT = FalseBBI.TrueBB;
-
-  if (!TT && blockAlwaysFallThrough(TrueBBI))
-    TT = getNextBlock(TrueBBI.BB);
-  if (!FT && blockAlwaysFallThrough(FalseBBI))
-    FT = getNextBlock(FalseBBI.BB);
-  if (TT != FT)
-    return false;
-  if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
-    return false;
-  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
-    return false;
+/// Shrink the provided inclusive range by one instruction.
+/// If the range was one instruction (\p It == \p Begin), It is not modified,
+/// but \p Empty is set to true.
+static inline void shrinkInclusiveRange(
+    MachineBasicBlock::iterator &Begin,
+    MachineBasicBlock::iterator &It,
+    bool &Empty) {
+  if (It == Begin)
+    Empty = true;
+  else
+    It--;
+}
 
-  // FIXME: Allow true block to have an early exit?
-  if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
-      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
-    return false;
+/// Count duplicated instructions and move the iterators to show where they
+/// are.
+/// @param TIB True Iterator Begin
+/// @param FIB False Iterator Begin
+/// These two iterators initially point to the first instruction of the two
+/// blocks, and finally point to the first non-shared instruction.
+/// @param TIE True Iterator End
+/// @param FIE False Iterator End
+/// These two iterators initially point to End() for the two blocks() and
+/// finally point to the first shared instruction in the tail.
+/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of
+/// two blocks.
+/// @param Dups1 count of duplicated instructions at the beginning of the 2
+/// blocks.
+/// @param Dups2 count of duplicated instructions at the end of the 2 blocks.
+/// @param SkipUnconditionalBranches if true, Don't make sure that
+/// unconditional branches at the end of the blocks are the same. True is
+/// passed when the blocks are analyzable to allow for fallthrough to be
+/// handled.
+/// @return false if the shared portion prevents if conversion.
+bool IfConverter::CountDuplicatedInstructions(
+    MachineBasicBlock::iterator &TIB,
+    MachineBasicBlock::iterator &FIB,
+    MachineBasicBlock::iterator &TIE,
+    MachineBasicBlock::iterator &FIE,
+    unsigned &Dups1, unsigned &Dups2,
+    MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+    bool SkipUnconditionalBranches) const {
 
-  // Count duplicate instructions at the beginning of the true and false blocks.
-  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
-  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
-  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TIB->isDebugValue()) {
-      while (TIB != TIE && TIB->isDebugValue())
-        ++TIB;
-      if (TIB == TIE)
-        break;
-    }
-    if (FIB->isDebugValue()) {
-      while (FIB != FIE && FIB->isDebugValue())
-        ++FIB;
-      if (FIB == FIE)
-        break;
-    }
+    TIB = skipDebugInstructionsForward(TIB, TIE);
+    if(TIB == TIE)
+      break;
+    FIB = skipDebugInstructionsForward(FIB, FIE);
+    if(FIB == FIE)
+      break;
     if (!TIB->isIdenticalTo(*FIB))
       break;
-    ++Dups1;
+    // A pred-clobbering instruction in the shared portion prevents
+    // if-conversion.
+    std::vector<MachineOperand> PredDefs;
+    if (TII->DefinesPredicate(*TIB, PredDefs))
+      return false;
+    // If we get all the way to the branch instructions, don't count them.
+    if (!TIB->isBranch())
+      ++Dups1;
     ++TIB;
     ++FIB;
   }
 
-  // Now, in preparation for counting duplicate instructions at the ends of the
-  // blocks, move the end iterators up past any branch instructions.
-  // If both blocks are returning don't skip the branches, since they will
-  // likely be both identical return instructions. In such cases the return
-  // can be left unpredicated.
   // Check for already containing all of the block.
   if (TIB == TIE || FIB == FIE)
     return true;
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
   --TIE;
   --FIE;
-  if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
-    while (TIE != TIB && TIE->isBranch())
-      --TIE;
-    while (FIE != FIB && FIE->isBranch())
-      --FIE;
+
+  // After this point TIB and TIE define an inclusive range, which means that
+  // TIB == TIE is true when there is one more instruction to consider, not at
+  // the end. Because we may not be able to go before TIB, we need a flag to
+  // indicate a completely empty range.
+  bool TEmpty = false, FEmpty = false;
+
+  // Upon exit TIE and FIE will both point at the last non-shared instruction.
+  // They need to be moved forward to point past the last non-shared
+  // instruction if the range they delimit is non-empty.
+  auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
+    if (!TEmpty)
+      ++TIE;
+    if (!FEmpty)
+      ++FIE;
+  });
+
+  if (!TBB.succ_empty() || !FBB.succ_empty()) {
+    if (SkipUnconditionalBranches) {
+      while (!TEmpty && TIE->isUnconditionalBranch())
+        shrinkInclusiveRange(TIB, TIE, TEmpty);
+      while (!FEmpty && FIE->isUnconditionalBranch())
+        shrinkInclusiveRange(FIB, FIE, FEmpty);
+    }
   }
 
   // If Dups1 includes all of a block, then don't count duplicate
   // instructions at the end of the blocks.
-  if (TIB == TIE || FIB == FIE)
+  if (TEmpty || FEmpty)
     return true;
 
   // Count duplicate instructions at the ends of the blocks.
-  while (TIE != TIB && FIE != FIB) {
+  while (!TEmpty && !FEmpty) {
     // Skip dbg_value instructions. These do not count.
-    if (TIE->isDebugValue()) {
-      while (TIE != TIB && TIE->isDebugValue())
-        --TIE;
-      if (TIE == TIB)
-        break;
+    TIE = skipDebugInstructionsBackward(TIE, TIB);
+    FIE = skipDebugInstructionsBackward(FIE, FIB);
+    TEmpty = TIE == TIB && TIE->isDebugValue();
+    FEmpty = FIE == FIB && FIE->isDebugValue();
+    if (TEmpty || FEmpty)
+      break;
+    if (!TIE->isIdenticalTo(*FIE))
+      break;
+    // We have to verify that any branch instructions are the same, and then we
+    // don't count them toward the # of duplicate instructions.
+    if (!TIE->isBranch())
+      ++Dups2;
+    shrinkInclusiveRange(TIB, TIE, TEmpty);
+    shrinkInclusiveRange(FIB, FIE, FEmpty);
+  }
+  return true;
+}
+
+/// RescanInstructions - Run ScanInstructions on a pair of blocks.
+/// @param TIB - True Iterator Begin, points to first non-shared instruction
+/// @param FIB - False Iterator Begin, points to first non-shared instruction
+/// @param TIE - True Iterator End, points past last non-shared instruction
+/// @param FIE - False Iterator End, points past last non-shared instruction
+/// @param TrueBBI  - BBInfo to update for the true block.
+/// @param FalseBBI - BBInfo to update for the false block.
+/// @returns - false if either block cannot be predicated or if both blocks end
+///   with a predicate-clobbering instruction.
+bool IfConverter::RescanInstructions(
+    MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+    MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+    BBInfo &TrueBBI, BBInfo &FalseBBI) const {
+  bool BranchUnpredicable = true;
+  TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false;
+  ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
+  if (TrueBBI.IsUnpredicable)
+    return false;
+  ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
+  if (FalseBBI.IsUnpredicable)
+    return false;
+  if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
+    return false;
+  return true;
+}
+
+#ifndef NDEBUG
+static void verifySameBranchInstructions(
+    MachineBasicBlock *MBB1,
+    MachineBasicBlock *MBB2) {
+  MachineBasicBlock::iterator B1 = MBB1->begin();
+  MachineBasicBlock::iterator B2 = MBB2->begin();
+  MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
+  MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
+  bool Empty1 = false, Empty2 = false;
+  while (!Empty1 && !Empty2) {
+    E1 = skipDebugInstructionsBackward(E1, B1);
+    E2 = skipDebugInstructionsBackward(E2, B2);
+    Empty1 = E1 == B1 && E1->isDebugValue();
+    Empty2 = E2 == B2 && E2->isDebugValue();
+
+    if (Empty1 && Empty2)
+      break;
+
+    if (Empty1) {
+      assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
+      break;
     }
-    if (FIE->isDebugValue()) {
-      while (FIE != FIB && FIE->isDebugValue())
-        --FIE;
-      if (FIE == FIB)
-        break;
+    if (Empty2) {
+      assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
+      break;
     }
-    if (!TIE->isIdenticalTo(*FIE))
+
+    if (E1->isBranch() || E2->isBranch())
+      assert(E1->isIdenticalTo(*E2) &&
+             "Branch mis-match, branch instructions don't match.");
+    else
       break;
-    ++Dups2;
-    --TIE;
-    --FIE;
+    shrinkInclusiveRange(B1, E1, Empty1);
+    shrinkInclusiveRange(B2, E2, Empty2);
+  }
+}
+#endif
+
+/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) form a diamond if a common tail block is
+/// extracted.
+/// While not strictly a diamond, this pattern would form a diamond if
+/// tail-merging had merged the shared tails.
+///           EBB
+///         _/   \_
+///         |     |
+///        TBB   FBB
+///        /  \ /   \
+///  FalseBB TrueBB FalseBB
+/// Currently only handles analyzable branches.
+/// Specifically excludes actual diamonds to avoid overlap.
+bool IfConverter::ValidForkedDiamond(
+    BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned &Dups1, unsigned &Dups2,
+    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
+    return false;
+  // Don't IfConvert blocks that can't be folded into their predecessor.
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // This function is specifically looking for conditional tails, as
+  // unconditional tails are already handled by the standard diamond case.
+  if (TrueBBI.BrCond.size() == 0 ||
+      FalseBBI.BrCond.size() == 0)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *TF = TrueBBI.FalseBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+  MachineBasicBlock *FF = FalseBBI.FalseBB;
+
+  if (!TT)
+    TT = getNextBlock(*TrueBBI.BB);
+  if (!TF)
+    TF = getNextBlock(*TrueBBI.BB);
+  if (!FT)
+    FT = getNextBlock(*FalseBBI.BB);
+  if (!FF)
+    FF = getNextBlock(*FalseBBI.BB);
+
+  if (!TT || !TF)
+    return false;
+
+  // Check successors. If they don't match, bail.
+  if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
+    return false;
+
+  bool FalseReversed = false;
+  if (TF == FT && TT == FF) {
+    // If the branches are opposing, but we can't reverse, don't do it.
+    if (!FalseBBI.IsBrReversible)
+      return false;
+    FalseReversed = true;
+    reverseBranchCondition(FalseBBI);
   }
+  auto UnReverseOnExit = make_scope_exit([&]() {
+    if (FalseReversed)
+      reverseBranchCondition(FalseBBI);
+  });
+
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+                                  *TrueBBI.BB, *FalseBBI.BB,
+                                  /* SkipUnconditionalBranches */ true))
+    return false;
+
+  TrueBBICalc.BB = TrueBBI.BB;
+  FalseBBICalc.BB = FalseBBI.BB;
+  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+    return false;
 
+  // The size is used to decide whether to if-convert, and the shared portions
+  // are subtracted off. Because of the subtraction, we just use the size that
+  // was calculated by the original ScanInstructions, as it is correct.
+  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
   return true;
 }
 
-/// ScanInstructions - Scan all the instructions in the block to determine if
-/// the block is predicable. In most cases, that means all the instructions
-/// in the block are isPredicable(). Also checks if the block contains any
-/// instruction which can clobber a predicate (e.g. condition code register).
-/// If so, the block is not predicable unless it's the last instruction.
-void IfConverter::ScanInstructions(BBInfo &BBI) {
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(
+    BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned &Dups1, unsigned &Dups2,
+    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+  if (!TT && blockAlwaysFallThrough(TrueBBI))
+    TT = getNextBlock(*TrueBBI.BB);
+  if (!FT && blockAlwaysFallThrough(FalseBBI))
+    FT = getNextBlock(*FalseBBI.BB);
+  if (TT != FT)
+    return false;
+  if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+    return false;
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // FIXME: Allow true block to have an early exit?
+  if (TrueBBI.FalseBB || FalseBBI.FalseBB)
+    return false;
+
+  // Count duplicate instructions at the beginning and end of the true and
+  // false blocks.
+  // Skip unconditional branches only if we are considering an analyzable
+  // diamond. Otherwise the branches must be the same.
+  bool SkipUnconditionalBranches =
+      TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable;
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+                                  *TrueBBI.BB, *FalseBBI.BB,
+                                  SkipUnconditionalBranches))
+    return false;
+
+  TrueBBICalc.BB = TrueBBI.BB;
+  FalseBBICalc.BB = FalseBBI.BB;
+  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+    return false;
+  // The size is used to decide whether to if-convert, and the shared portions
+  // are subtracted off. Because of the subtraction, we just use the size that
+  // was calculated by the original ScanInstructions, as it is correct.
+  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+  return true;
+}
+
+/// AnalyzeBranches - Look at the branches at the end of a block to determine if
+/// the block is predicable.
+void IfConverter::AnalyzeBranches(BBInfo &BBI) {
   if (BBI.IsDone)
     return;
 
-  bool AlreadyPredicated = !BBI.Predicate.empty();
-  // First analyze the end of BB branches.
   BBI.TrueBB = BBI.FalseBB = nullptr;
   BBI.BrCond.clear();
   BBI.IsBrAnalyzable =
       !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  BBI.IsBrReversible = (RevCond.size() == 0) ||
+      !TII->reverseBranchCondition(RevCond);
   BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
 
   if (BBI.BrCond.size()) {
@@ -666,16 +945,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (!BBI.FalseBB) {
       // Malformed bcc? True and false blocks are the same?
       BBI.IsUnpredicable = true;
-      return;
     }
   }
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI,
+                                   MachineBasicBlock::iterator &Begin,
+                                   MachineBasicBlock::iterator &End,
+                                   bool BranchUnpredicable) const {
+  if (BBI.IsDone || BBI.IsUnpredicable)
+    return;
+
+  bool AlreadyPredicated = !BBI.Predicate.empty();
 
-  // Then scan all the instructions.
   BBI.NonPredSize = 0;
   BBI.ExtraCost = 0;
   BBI.ExtraCost2 = 0;
   BBI.ClobbersPred = false;
-  for (auto &MI : *BBI.BB) {
+  for (MachineInstr &MI : make_range(Begin, End)) {
     if (MI.isDebugValue())
       continue;
 
@@ -715,6 +1007,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     bool isPredicated = TII->isPredicated(MI);
     bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
 
+    if (BranchUnpredicable && MI.isBranch()) {
+      BBI.IsUnpredicable = true;
+      return;
+    }
+
     // A conditional branch is not predicable, but it may be eliminated.
     if (isCondBr)
       continue;
@@ -756,13 +1053,24 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
   }
 }
 
-/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
-/// predicated by the specified predicate.
+/// Determine if the block is a suitable candidate to be predicated by the
+/// specified predicate.
+/// @param BBI BBInfo for the block to check
+/// @param Pred Predicate array for the branch that leads to BBI
+/// @param isTriangle true if the Analysis is for a triangle
+/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
+///        case
+/// @param hasCommonTail true if BBI shares a tail with a sibling block that
+///        contains any instruction that would make the block unpredicable.
 bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
                                       SmallVectorImpl<MachineOperand> &Pred,
-                                      bool isTriangle, bool RevBranch) {
+                                      bool isTriangle, bool RevBranch,
+                                      bool hasCommonTail) {
   // If the block is dead or unpredicable, then it cannot be predicated.
-  if (BBI.IsDone || BBI.IsUnpredicable)
+  // Two blocks may share a common unpredicable tail, but this doesn't prevent
+  // them from being if-converted. The non-shared portion is assumed to have
+  // been checked
+  if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
     return false;
 
   // If it is already predicated but we couldn't analyze its terminator, the
@@ -776,7 +1084,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
     return false;
 
-  if (BBI.BrCond.size()) {
+  if (!hasCommonTail && BBI.BrCond.size()) {
     if (!isTriangle)
       return false;
 
@@ -784,10 +1092,10 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
     SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
     SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
     if (RevBranch) {
-      if (TII->ReverseBranchCondition(Cond))
+      if (TII->reverseBranchCondition(Cond))
         return false;
     }
-    if (TII->ReverseBranchCondition(RevPred) ||
+    if (TII->reverseBranchCondition(RevPred) ||
         !TII->SubsumesPredicate(Cond, RevPred))
       return false;
   }
@@ -795,13 +1103,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   return true;
 }
 
-/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
-/// the specified block. Record its successors and whether it looks like an
-/// if-conversion candidate.
+/// Analyze the structure of the sub-CFG starting from the specified block.
+/// Record its successors and whether it looks like an if-conversion candidate.
 void IfConverter::AnalyzeBlock(
-    MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+    MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
   struct BBState {
-    BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
+    BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {}
     MachineBasicBlock *MBB;
 
     /// This flag is true if MBB's successors have been analyzed.
@@ -825,7 +1132,10 @@ void IfConverter::AnalyzeBlock(
       BBI.BB = BB;
       BBI.IsBeingAnalyzed = true;
 
-      ScanInstructions(BBI);
+      AnalyzeBranches(BBI);
+      MachineBasicBlock::iterator Begin = BBI.BB->begin();
+      MachineBasicBlock::iterator End = BBI.BB->end();
+      ScanInstructions(BBI, Begin, End);
 
       // Unanalyzable or ends with fallthrough or unconditional branch, or if is
       // not considered for ifcvt anymore.
@@ -854,8 +1164,8 @@ void IfConverter::AnalyzeBlock(
 
       // Push the False and True blocks to the stack.
       State.SuccsAnalyzed = true;
-      BBStack.push_back(BBI.FalseBB);
-      BBStack.push_back(BBI.TrueBB);
+      BBStack.push_back(*BBI.FalseBB);
+      BBStack.push_back(*BBI.TrueBB);
       continue;
     }
 
@@ -871,7 +1181,7 @@ void IfConverter::AnalyzeBlock(
 
     SmallVector<MachineOperand, 4>
         RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
-    bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+    bool CanRevCond = !TII->reverseBranchCondition(RevCond);
 
     unsigned Dups = 0;
     unsigned Dups2 = 0;
@@ -881,25 +1191,59 @@ void IfConverter::AnalyzeBlock(
 
     BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
 
-    if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
-        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
-                                         TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
-                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
-                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
-                         Prediction) &&
-        FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
-        FeasibilityAnalysis(FalseBBI, RevCond)) {
-      // Diamond:
-      //   EBB
-      //   / \_
-      //  |   |
-      // TBB FBB
-      //   \ /
-      //  TailBB
-      // Note TailBB can be empty.
-      Tokens.push_back(llvm::make_unique<IfcvtToken>(
-          BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
-      Enqueued = true;
+    if (CanRevCond) {
+      BBInfo TrueBBICalc, FalseBBICalc;
+      auto feasibleDiamond = [&]() {
+        bool MeetsSize = MeetIfcvtSizeLimit(
+            *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
+                          TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
+            *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
+                           FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
+            Prediction);
+        bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
+                                                /* IsTriangle */ false, /* RevCond */ false,
+                                                /* hasCommonTail */ true);
+        bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
+                                                 /* IsTriangle */ false, /* RevCond */ false,
+                                                 /* hasCommonTail */ true);
+        return MeetsSize && TrueFeasible && FalseFeasible;
+      };
+
+      if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+                       TrueBBICalc, FalseBBICalc)) {
+        if (feasibleDiamond()) {
+          // Diamond:
+          //   EBB
+          //   / \_
+          //  |   |
+          // TBB FBB
+          //   \ /
+          //  TailBB
+          // Note TailBB can be empty.
+          Tokens.push_back(llvm::make_unique<IfcvtToken>(
+              BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+          Enqueued = true;
+        }
+      } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+                                    TrueBBICalc, FalseBBICalc)) {
+        if (feasibleDiamond()) {
+          // ForkedDiamond:
+          // if TBB and FBB have a common tail that includes their conditional
+          // branch instructions, then we can If Convert this pattern.
+          //          EBB
+          //         _/ \_
+          //         |   |
+          //        TBB  FBB
+          //        / \ /   \
+          //  FalseBB TrueBB FalseBB
+          //
+          Tokens.push_back(llvm::make_unique<IfcvtToken>(
+              BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+          Enqueued = true;
+        }
+      }
     }
 
     if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
@@ -985,25 +1329,23 @@ void IfConverter::AnalyzeBlock(
   }
 }
 
-/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
-/// candidates.
+/// Analyze all blocks and find entries for all if-conversion candidates.
 void IfConverter::AnalyzeBlocks(
     MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
-  for (auto &BB : MF)
-    AnalyzeBlock(&BB, Tokens);
+  for (MachineBasicBlock &MBB : MF)
+    AnalyzeBlock(MBB, Tokens);
 
   // Sort to favor more complex ifcvt scheme.
   std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
 }
 
-/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
-/// that all the intervening blocks are empty (given BB can fall through to its
-/// next block).
-static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
-  MachineFunction::iterator PI = BB->getIterator();
+/// Returns true either if ToMBB is the next block after MBB or that all the
+/// intervening blocks are empty (given MBB can fall through to its next block).
+static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
+  MachineFunction::iterator PI = MBB.getIterator();
   MachineFunction::iterator I = std::next(PI);
-  MachineFunction::iterator TI = ToBB->getIterator();
-  MachineFunction::iterator E = BB->getParent()->end();
+  MachineFunction::iterator TI = ToMBB.getIterator();
+  MachineFunction::iterator E = MBB.getParent()->end();
   while (I != TI) {
     // Check isSuccessor to avoid case where the next block is empty, but
     // it's not a successor.
@@ -1014,30 +1356,27 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
   return true;
 }
 
-/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
-/// to determine if it can be if-converted. If predecessor is already enqueued,
-/// dequeue it!
-void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
-  for (const auto &Predecessor : BB->predecessors()) {
+/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
+/// can be if-converted. If predecessor is already enqueued, dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) {
+  for (const MachineBasicBlock *Predecessor : MBB.predecessors()) {
     BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
-    if (PBBI.IsDone || PBBI.BB == BB)
+    if (PBBI.IsDone || PBBI.BB == &MBB)
       continue;
     PBBI.IsAnalyzed = false;
     PBBI.IsEnqueued = false;
   }
 }
 
-/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
-///
-static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+/// Inserts an unconditional branch from \p MBB to \p ToMBB.
+static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
                                const TargetInstrInfo *TII) {
   DebugLoc dl;  // FIXME: this is nowhere
   SmallVector<MachineOperand, 0> NoCond;
-  TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
+  TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
 }
 
-/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
-/// successors.
+/// Remove true / false edges if either / both are no longer successors.
 void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 4> Cond;
@@ -1046,29 +1385,42 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
 }
 
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
-/// values defined in MI which are not live/used by MI.
+/// values defined in MI which are also live/used by MI.
 static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
+  const TargetRegisterInfo *TRI = MI.getParent()->getParent()
+    ->getSubtarget().getRegisterInfo();
+
+  // Before stepping forward past MI, remember which regs were live
+  // before MI. This is needed to set the Undef flag only when reg is
+  // dead.
+  SparseSet<unsigned> LiveBeforeMI;
+  LiveBeforeMI.setUniverse(TRI->getNumRegs());
+  for (unsigned Reg : Redefs)
+    LiveBeforeMI.insert(Reg);
+
   SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
   Redefs.stepForward(MI, Clobbers);
 
   // Now add the implicit uses for each of the clobbered values.
-  for (auto Reg : Clobbers) {
+  for (auto Clobber : Clobbers) {
     // FIXME: Const cast here is nasty, but better than making StepForward
     // take a mutable instruction instead of const.
-    MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+    unsigned Reg = Clobber.first;
+    MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
     MachineInstr *OpMI = Op.getParent();
     MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
     if (Op.isRegMask()) {
       // First handle regmasks.  They clobber any entries in the mask which
       // means that we need a def for those registers.
-      MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+      if (LiveBeforeMI.count(Reg))
+        MIB.addReg(Reg, RegState::Implicit);
 
       // We also need to add an implicit def of this register for the later
       // use to read from.
       // For the register allocator to have allocated a register clobbered
       // by the call which is used later, it must be the case that
       // the call doesn't return.
-      MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
+      MIB.addReg(Reg, RegState::Implicit | RegState::Define);
       continue;
     }
     assert(Op.isReg() && "Register operand required");
@@ -1078,13 +1430,23 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
       if (Redefs.contains(Op.getReg()))
         Op.setIsDead(false);
     }
-    MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+    if (LiveBeforeMI.count(Reg))
+      MIB.addReg(Reg, RegState::Implicit);
+    else {
+      bool HasLiveSubReg = false;
+      for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) {
+        if (!LiveBeforeMI.count(*S))
+          continue;
+        HasLiveSubReg = true;
+        break;
+      }
+      if (HasLiveSubReg)
+        MIB.addReg(Reg, RegState::Implicit);
+    }
   }
 }
 
-/**
- * Remove kill flags from operands with a registers in the @p DontKill set.
- */
+/// Remove kill flags from operands with a registers in the \p DontKill set.
 static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
   for (MIBundleOperands O(MI); O.isValid(); ++O) {
     if (!O->isReg() || !O->isKill())
@@ -1094,20 +1456,17 @@ static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
   }
 }
 
-/**
- * Walks a range of machine instructions and removes kill flags for registers
- * in the @p DontKill set.
- */
+/// Walks a range of machine instructions and removes kill flags for registers
+/// in the \p DontKill set.
 static void RemoveKills(MachineBasicBlock::iterator I,
                         MachineBasicBlock::iterator E,
                         const LivePhysRegs &DontKill,
                         const MCRegisterInfo &MCRI) {
-  for ( ; I != E; ++I)
-    RemoveKills(*I, DontKill);
+  for (MachineInstr &MI : make_range(I, E))
+    RemoveKills(MI, DontKill);
 }
 
-/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
-///
+/// If convert a simple (split, no rejoin) sub-CFG.
 bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
   BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1118,54 +1477,58 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   if (Kind == ICSimpleFalse)
     std::swap(CvtBBI, NextBBI);
 
+  MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+  MachineBasicBlock &NextMBB = *NextBBI->BB;
   if (CvtBBI->IsDone ||
-      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+      (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
     // Something has changed. It's no longer safe to predicate this block.
     BBI.IsAnalyzed = false;
     CvtBBI->IsAnalyzed = false;
     return false;
   }
 
-  if (CvtBBI->BB->hasAddressTaken())
+  if (CvtMBB.hasAddressTaken())
     // Conservatively abort if-conversion if BB's address is taken.
     return false;
 
   if (Kind == ICSimpleFalse)
-    if (TII->ReverseBranchCondition(Cond))
+    if (TII->reverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
 
-  // Initialize liveins to the first BB. These are potentiall redefined by
-  // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*CvtBBI->BB);
-  Redefs.addLiveIns(*NextBBI->BB);
-
-  // Compute a set of registers which must not be killed by instructions in
-  // BB1: This is everything live-in to BB2.
-  DontKill.init(TRI);
-  DontKill.addLiveIns(*NextBBI->BB);
+  Redefs.init(*TRI);
+  DontKill.init(*TRI);
+
+  if (MRI->tracksLiveness()) {
+    // Initialize liveins to the first BB. These are potentiall redefined by
+    // predicated instructions.
+    Redefs.addLiveIns(CvtMBB);
+    Redefs.addLiveIns(NextMBB);
+    // Compute a set of registers which must not be killed by instructions in
+    // BB1: This is everything live-in to BB2.
+    DontKill.addLiveIns(NextMBB);
+  }
 
-  if (CvtBBI->BB->pred_size() > 1) {
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+  if (CvtMBB.pred_size() > 1) {
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB, true);
+    BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
-    RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
-    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+    RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
+    PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
 
     // Merge converted block into entry block.
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     MergeBlocks(BBI, *CvtBBI);
   }
 
   bool IterIfcvt = true;
-  if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
-    InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+  if (!canFallThroughTo(*BBI.BB, NextMBB)) {
+    InsertUncondBranch(*BBI.BB, NextMBB, TII);
     BBI.HasFallThrough = false;
     // Now ifcvt'd block will look like this:
     // BB:
@@ -1185,15 +1548,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
   CvtBBI->IsDone = true;
 
   // FIXME: Must maintain LiveIns.
   return true;
 }
 
-/// IfConvertTriangle - If convert a triangle sub-CFG.
-///
+/// If convert a triangle sub-CFG.
 bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
   BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1205,29 +1567,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     std::swap(CvtBBI, NextBBI);
 
+  MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+  MachineBasicBlock &NextMBB = *NextBBI->BB;
   if (CvtBBI->IsDone ||
-      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+      (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
     // Something has changed. It's no longer safe to predicate this block.
     BBI.IsAnalyzed = false;
     CvtBBI->IsAnalyzed = false;
     return false;
   }
 
-  if (CvtBBI->BB->hasAddressTaken())
+  if (CvtMBB.hasAddressTaken())
     // Conservatively abort if-conversion if BB's address is taken.
     return false;
 
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
-    if (TII->ReverseBranchCondition(Cond))
+    if (TII->reverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
 
   if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
-    if (ReverseBranchCondition(*CvtBBI)) {
+    if (reverseBranchCondition(*CvtBBI)) {
       // BB has been changed, modify its predecessors (except for this
       // one) so they don't get ifcvt'ed based on bad intel.
-      for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
-             E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
-        MachineBasicBlock *PBB = *PI;
+      for (MachineBasicBlock *PBB : CvtMBB.predecessors()) {
         if (PBB == BBI.BB)
           continue;
         BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
@@ -1241,9 +1603,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
 
   // Initialize liveins to the first BB. These are potentially redefined by
   // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*CvtBBI->BB);
-  Redefs.addLiveIns(*NextBBI->BB);
+  Redefs.init(*TRI);
+  if (MRI->tracksLiveness()) {
+    Redefs.addLiveIns(CvtMBB);
+    Redefs.addLiveIns(NextMBB);
+  }
 
   DontKill.clear();
 
@@ -1251,29 +1615,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
 
   if (HasEarlyExit) {
-    // Get probabilities before modifying CvtBBI->BB and BBI.BB.
-    CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
-    CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
-    BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
-    BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
+    // Get probabilities before modifying CvtMBB and BBI.BB.
+    CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB);
+    CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB);
+    BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB);
+    BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
   }
 
-  if (CvtBBI->BB->pred_size() > 1) {
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+  if (CvtMBB.pred_size() > 1) {
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB, true);
+    BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
     // Predicate the 'true' block after removing its branch.
-    CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
-    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+    CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
+    PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
 
     // Now merge the entry of the triangle with the true block.
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     MergeBlocks(BBI, *CvtBBI, false);
   }
 
@@ -1281,24 +1645,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   if (HasEarlyExit) {
     SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
                                            CvtBBI->BrCond.end());
-    if (TII->ReverseBranchCondition(RevCond))
+    if (TII->reverseBranchCondition(RevCond))
       llvm_unreachable("Unable to reverse branch condition!");
 
     // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
-    // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
-    //   Prob(BBI.BB, NextBBI->BB) +
-    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+    // NewNext = New_Prob(BBI.BB, NextMBB) =
+    //   Prob(BBI.BB, NextMBB) +
+    //   Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB)
     // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
-    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
-    auto NewTrueBB = getNextBlock(BBI.BB);
+    //   Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB)
+    auto NewTrueBB = getNextBlock(*BBI.BB);
     auto NewNext = BBNext + BBCvt * CvtNext;
-    auto NewTrueBBIter =
-        std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+    auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB);
     if (NewTrueBBIter != BBI.BB->succ_end())
       BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
 
     auto NewFalse = BBCvt * CvtFalse;
-    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+    TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
     BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
   }
 
@@ -1306,18 +1669,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   // predecessors. Otherwise, add an unconditional branch to 'false'.
   bool FalseBBDead = false;
   bool IterIfcvt = true;
-  bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+  bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB);
   if (!isFallThrough) {
     // Only merge them if the true block does not fallthrough to the false
     // block. By not merging them, we make it possible to iteratively
     // ifcvt the blocks.
     if (!HasEarlyExit &&
-        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
-        !NextBBI->BB->hasAddressTaken()) {
+        NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
+        !NextMBB.hasAddressTaken()) {
       MergeBlocks(BBI, *NextBBI);
       FalseBBDead = true;
     } else {
-      InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+      InsertUncondBranch(*BBI.BB, NextMBB, TII);
       BBI.HasFallThrough = false;
     }
     // Mixed predicated and unpredicated code. This cannot be iteratively
@@ -1330,7 +1693,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
   CvtBBI->IsDone = true;
   if (FalseBBDead)
     NextBBI->IsDone = true;
@@ -1339,23 +1702,25 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   return true;
 }
 
-/// IfConvertDiamond - If convert a diamond sub-CFG.
-///
-bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
-                                   unsigned NumDups1, unsigned NumDups2) {
-  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
-  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
-  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
-  // True block must fall through or end with an unanalyzable terminator.
-  if (!TailBB) {
-    if (blockAlwaysFallThrough(TrueBBI))
-      TailBB = FalseBBI.TrueBB;
-    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
-  }
+/// Common code shared between diamond conversions.
+/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
+/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
+///               and FalseBBI
+/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
+///               and \p FalseBBI
+/// \p RemoveBranch - Remove the common branch of the two blocks before
+///                   predicating. Only false for unanalyzable fallthrough
+///                   cases. The caller will replace the branch if necessary.
+/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
+///                    unanalyzable fallthrough
+bool IfConverter::IfConvertDiamondCommon(
+    BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned NumDups1, unsigned NumDups2,
+    bool TClobbersPred, bool FClobbersPred,
+    bool RemoveBranch, bool MergeAddEdges) {
 
   if (TrueBBI.IsDone || FalseBBI.IsDone ||
-      TrueBBI.BB->pred_size() > 1 ||
-      FalseBBI.BB->pred_size() > 1) {
+      TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
     // Something has changed. It's no longer safe to predicate these blocks.
     BBI.IsAnalyzed = false;
     TrueBBI.IsAnalyzed = false;
@@ -1373,36 +1738,47 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBInfo *BBI1 = &TrueBBI;
   BBInfo *BBI2 = &FalseBBI;
   SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
-  if (TII->ReverseBranchCondition(RevCond))
+  if (TII->reverseBranchCondition(RevCond))
     llvm_unreachable("Unable to reverse branch condition!");
   SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
   SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
 
   // Figure out the more profitable ordering.
   bool DoSwap = false;
-  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+  if (TClobbersPred && !FClobbersPred)
     DoSwap = true;
-  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+  else if (!TClobbersPred && !FClobbersPred) {
     if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
       DoSwap = true;
-  }
+  } else if (TClobbersPred && FClobbersPred)
+    llvm_unreachable("Predicate info cannot be clobbered by both sides.");
   if (DoSwap) {
     std::swap(BBI1, BBI2);
     std::swap(Cond1, Cond2);
   }
 
   // Remove the conditional branch from entry to the blocks.
-  BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
-
-  // Initialize liveins to the first BB. These are potentially redefined by
-  // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*BBI1->BB);
+  BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
+  MachineBasicBlock &MBB1 = *BBI1->BB;
+  MachineBasicBlock &MBB2 = *BBI2->BB;
+
+  // Initialize the Redefs:
+  // - BB2 live-in regs need implicit uses before being redefined by BB1
+  //   instructions.
+  // - BB1 live-out regs need implicit uses before being redefined by BB2
+  //   instructions. We start with BB1 live-ins so we have the live-out regs
+  //   after tracking the BB1 instructions.
+  Redefs.init(*TRI);
+  if (MRI->tracksLiveness()) {
+    Redefs.addLiveIns(MBB1);
+    Redefs.addLiveIns(MBB2);
+  }
 
   // Remove the duplicated instructions at the beginnings of both paths.
   // Skip dbg_value instructions
-  MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
-  MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
+  MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
+  MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
   BBI1->NonPredSize -= NumDups1;
   BBI2->NonPredSize -= NumDups1;
 
@@ -1421,52 +1797,60 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // Compute a set of registers which must not be killed by instructions in BB1:
   // This is everything used+live in BB2 after the duplicated instructions. We
   // can compute this set by simulating liveness backwards from the end of BB2.
-  DontKill.init(TRI);
-  for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
-       E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
-    DontKill.stepBackward(*I);
+  DontKill.init(*TRI);
+  if (MRI->tracksLiveness()) {
+    for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+      DontKill.stepBackward(MI);
+
+    for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+      SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+      Redefs.stepForward(MI, Dummy);
+    }
   }
+  BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
+  MBB2.erase(MBB2.begin(), DI2);
 
-  for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
-       ++I) {
-    SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
-    Redefs.stepForward(*I, IgnoredClobbers);
-  }
-  BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
-  BBI2->BB->erase(BBI2->BB->begin(), DI2);
-
-  // Remove branch from the 'true' block, unless it was not analyzable.
-  // Non-analyzable branches need to be preserved, since in such cases,
-  // the CFG structure is not an actual diamond (the join block may not
-  // be present).
-  if (BBI1->IsBrAnalyzable)
-    BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+  // The branches have been checked to match, so it is safe to remove the branch
+  // in BB1 and rely on the copy in BB2
+#ifndef NDEBUG
+  // Unanalyzable branches must match exactly. Check that now.
+  if (!BBI1->IsBrAnalyzable)
+    verifySameBranchInstructions(&MBB1, &MBB2);
+#endif
+  BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB);
   // Remove duplicated instructions.
-  DI1 = BBI1->BB->end();
+  DI1 = MBB1.end();
   for (unsigned i = 0; i != NumDups2; ) {
     // NumDups2 only counted non-dbg_value instructions, so this won't
     // run off the head of the list.
-    assert (DI1 != BBI1->BB->begin());
+    assert(DI1 != MBB1.begin());
     --DI1;
     // skip dbg_value instructions
     if (!DI1->isDebugValue())
       ++i;
   }
-  BBI1->BB->erase(DI1, BBI1->BB->end());
+  MBB1.erase(DI1, MBB1.end());
 
   // Kill flags in the true block for registers living into the false block
   // must be removed.
-  RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
+  RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
 
-  // Remove 'false' block branch (unless it was not analyzable), and find
-  // the last instruction to predicate.
-  if (BBI2->IsBrAnalyzable)
-    BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
   DI2 = BBI2->BB->end();
+  // The branches have been checked to match. Skip over the branch in the false
+  // block so that we don't try to predicate it.
+  if (RemoveBranch)
+    BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
+  else {
+    do {
+      assert(DI2 != MBB2.begin());
+      DI2--;
+    } while (DI2->isBranch() || DI2->isDebugValue());
+    DI2++;
+  }
   while (NumDups2 != 0) {
     // NumDups2 only counted non-dbg_value instructions, so this won't
     // run off the head of the list.
-    assert (DI2 != BBI2->BB->begin());
+    assert(DI2 != MBB2.begin());
     --DI2;
     // skip dbg_value instructions
     if (!DI2->isDebugValue())
@@ -1483,13 +1867,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   //   addne  r0, r1, #1
   SmallSet<unsigned, 4> RedefsByFalse;
   SmallSet<unsigned, 4> ExtUses;
-  if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
-    for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
-      if (FI->isDebugValue())
+  if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
+    for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
+      if (FI.isDebugValue())
         continue;
       SmallVector<unsigned, 4> Defs;
-      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = FI->getOperand(i);
+      for (const MachineOperand &MO : FI.operands()) {
         if (!MO.isReg())
           continue;
         unsigned Reg = MO.getReg();
@@ -1506,8 +1889,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
         }
       }
 
-      for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-        unsigned Reg = Defs[i];
+      for (unsigned Reg : Defs) {
         if (!ExtUses.count(Reg)) {
           for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
                SubRegs.isValid(); ++SubRegs)
@@ -1518,17 +1900,17 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   }
 
   // Predicate the 'true' block.
-  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
+  PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse);
 
   // After predicating BBI1, if there is a predicated terminator in BBI1 and
   // a non-predicated in BBI2, then we don't want to predicate the one from
   // BBI2. The reason is that if we merged these blocks, we would end up with
   // two predicated terminators in the same block.
-  if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
-    MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
-    MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
-    if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
-        BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
+  if (!MBB2.empty() && (DI2 == MBB2.end())) {
+    MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
+    MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
+    if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) &&
+        BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T))
       --DI2;
   }
 
@@ -1536,8 +1918,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   PredicateBlock(*BBI2, DI2, *Cond2);
 
   // Merge the true block into the entry of the diamond.
-  MergeBlocks(BBI, *BBI1, TailBB == nullptr);
-  MergeBlocks(BBI, *BBI2, TailBB == nullptr);
+  MergeBlocks(BBI, *BBI1, MergeAddEdges);
+  MergeBlocks(BBI, *BBI2, MergeAddEdges);
+  return true;
+}
+
+/// If convert an almost-diamond sub-CFG where the true
+/// and false blocks share a common tail.
+bool IfConverter::IfConvertForkedDiamond(
+    BBInfo &BBI, IfcvtKind Kind,
+    unsigned NumDups1, unsigned NumDups2,
+    bool TClobbersPred, bool FClobbersPred) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+  // Save the debug location for later.
+  DebugLoc dl;
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
+  if (TIE != TrueBBI.BB->end())
+    dl = TIE->getDebugLoc();
+  // Removing branches from both blocks is safe, because we have already
+  // determined that both blocks have the same branch instructions. The branch
+  // will be added back at the end, unpredicated.
+  if (!IfConvertDiamondCommon(
+      BBI, TrueBBI, FalseBBI,
+      NumDups1, NumDups2,
+      TClobbersPred, FClobbersPred,
+      /* RemoveBranch */ true, /* MergeAddEdges */ true))
+    return false;
+
+  // Add back the branch.
+  // Debug location saved above when removing the branch from BBI2
+  TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
+                    TrueBBI.BrCond, dl);
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info.
+  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+  InvalidatePreds(*BBI.BB);
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// If convert a diamond sub-CFG.
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                                   unsigned NumDups1, unsigned NumDups2,
+                                   bool TClobbersPred, bool FClobbersPred) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+
+  // True block must fall through or end with an unanalyzable terminator.
+  if (!TailBB) {
+    if (blockAlwaysFallThrough(TrueBBI))
+      TailBB = FalseBBI.TrueBB;
+    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+  }
+
+  if (!IfConvertDiamondCommon(
+      BBI, TrueBBI, FalseBBI,
+      NumDups1, NumDups2,
+      TClobbersPred, FClobbersPred,
+      /* RemoveBranch */ TrueBBI.IsBrAnalyzable,
+      /* MergeAddEdges */ TailBB == nullptr))
+    return false;
 
   // If the if-converted block falls through or unconditionally branches into
   // the tail block, and the tail block does not have other predecessors, then
@@ -1560,7 +2006,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       CanMergeTail = false;
     else if (NumPreds == 1 && CanMergeTail) {
       MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
-      if (*PI != BBI1->BB && *PI != BBI2->BB)
+      if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
         CanMergeTail = false;
     }
     if (CanMergeTail) {
@@ -1568,7 +2014,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       TailBBI.IsDone = true;
     } else {
       BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
-      InsertUncondBranch(BBI.BB, TailBB, TII);
+      InsertUncondBranch(*BBI.BB, *TailBB, TII);
       BBI.HasFallThrough = false;
     }
   }
@@ -1576,13 +2022,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // RemoveExtraEdges won't work if the block has an unanalyzable branch,
   // which can happen here if TailBB is unanalyzable and is merged, so
   // explicitly remove BBI1 and BBI2 as successors.
-  BBI.BB->removeSuccessor(BBI1->BB);
-  BBI.BB->removeSuccessor(BBI2->BB, true);
+  BBI.BB->removeSuccessor(TrueBBI.BB);
+  BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
   RemoveExtraEdges(BBI);
 
   // Update block info.
   BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
 
   // FIXME: Must maintain LiveIns.
   return true;
@@ -1594,8 +2040,7 @@ static bool MaySpeculate(const MachineInstr &MI,
   if (!MI.isSafeToMove(nullptr, SawStore))
     return false;
 
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -1608,15 +2053,15 @@ static bool MaySpeculate(const MachineInstr &MI,
   return true;
 }
 
-/// PredicateBlock - Predicate instructions from the start of the block to the
-/// specified end with the specified condition.
+/// Predicate instructions from the start of the block to the specified end with
+/// the specified condition.
 void IfConverter::PredicateBlock(BBInfo &BBI,
                                  MachineBasicBlock::iterator E,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  SmallSet<unsigned, 4> *LaterRedefs) {
   bool AnyUnpred = false;
   bool MaySpec = LaterRedefs != nullptr;
-  for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
+  for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
     if (I.isDebugValue() || TII->isPredicated(I))
       continue;
     // It may be possible not to predicate an instruction if it's the 'true'
@@ -1651,14 +2096,15 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     ++NumUnpred;
 }
 
-/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
-/// the destination block. Skip end of block branches if IgnoreBr is true.
+/// Copy and predicate instructions from source BB to the destination block.
+/// Skip end of block branches if IgnoreBr is true.
 void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
                                         SmallVectorImpl<MachineOperand> &Cond,
                                         bool IgnoreBr) {
   MachineFunction &MF = *ToBBI.BB->getParent();
 
-  for (auto &I : *FromBBI.BB) {
+  MachineBasicBlock &FromMBB = *FromBBI.BB;
+  for (MachineInstr &I : FromMBB) {
     // Do not copy the end of the block branches.
     if (IgnoreBr && I.isBranch())
       break;
@@ -1691,13 +2137,12 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
   }
 
   if (!IgnoreBr) {
-    std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
-                                           FromBBI.BB->succ_end());
-    MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+    std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(),
+                                           FromMBB.succ_end());
+    MachineBasicBlock *NBB = getNextBlock(FromMBB);
     MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
 
-    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
-      MachineBasicBlock *Succ = Succs[i];
+    for (MachineBasicBlock *Succ : Succs) {
       // Fallthrough edge can't be transferred.
       if (Succ == FallThrough)
         continue;
@@ -1714,25 +2159,25 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
   ++NumDupBBs;
 }
 
-/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
-/// This will leave FromBB as an empty block, so remove all of its
-/// successor edges except for the fall-through edge.  If AddEdges is true,
-/// i.e., when FromBBI's branch is being moved, add those successor edges to
-/// ToBBI.
+/// Move all instructions from FromBB to the end of ToBB.  This will leave
+/// FromBB as an empty block, so remove all of its successor edges except for
+/// the fall-through edge.  If AddEdges is true, i.e., when FromBBI's branch is
+/// being moved, add those successor edges to ToBBI.
 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
-  assert(!FromBBI.BB->hasAddressTaken() &&
+  MachineBasicBlock &FromMBB = *FromBBI.BB;
+  assert(!FromMBB.hasAddressTaken() &&
          "Removing a BB whose address is taken!");
 
-  // In case FromBBI.BB contains terminators (e.g. return instruction),
+  // In case FromMBB contains terminators (e.g. return instruction),
   // first move the non-terminator instructions, then the terminators.
-  MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
+  MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
   MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
-  ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
+  ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI);
 
   // If FromBB has non-predicated terminator we should copy it at the end.
-  if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
+  if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI))
     ToTI = ToBBI.BB->end();
-  ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
+  ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end());
 
   // Force normalizing the successors' probabilities of ToBBI.BB to convert all
   // unknown probabilities into known ones.
@@ -1740,25 +2185,23 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   // eliminate all unknown probabilities in MBB.
   ToBBI.BB->normalizeSuccProbs();
 
-  SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
-                                                FromBBI.BB->succ_end());
-  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
+                                                FromMBB.succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromMBB);
   MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
-  // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
-  // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+  // The edge probability from ToBBI.BB to FromMBB, which is only needed when
+  // AddEdges is true and FromMBB is a successor of ToBBI.BB.
   auto To2FromProb = BranchProbability::getZero();
-  if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
-    To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
-    // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+  if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+    To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
+    // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the
     // edge probability being merged to other edges when this edge is removed
     // later.
-    ToBBI.BB->setSuccProbability(
-        std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
-        BranchProbability::getZero());
+    ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB),
+                                 BranchProbability::getZero());
   }
 
-  for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = FromSuccs[i];
+  for (MachineBasicBlock *Succ : FromSuccs) {
     // Fallthrough edge can't be transferred.
     if (Succ == FallThrough)
       continue;
@@ -1766,26 +2209,26 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
     auto NewProb = BranchProbability::getZero();
     if (AddEdges) {
       // Calculate the edge probability for the edge from ToBBI.BB to Succ,
-      // which is a portion of the edge probability from FromBBI.BB to Succ. The
-      // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+      // which is a portion of the edge probability from FromMBB to Succ. The
+      // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
       // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
-      NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+      NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
 
-      // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
-      // only happens when if-converting a diamond CFG and FromBBI.BB is the
-      // tail BB.  In this case FromBBI.BB post-dominates ToBBI.BB and hence we
-      // could just use the probabilities on FromBBI.BB's out-edges when adding
+      // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
+      // only happens when if-converting a diamond CFG and FromMBB is the
+      // tail BB.  In this case FromMBB post-dominates ToBBI.BB and hence we
+      // could just use the probabilities on FromMBB's out-edges when adding
       // new successors.
       if (!To2FromProb.isZero())
         NewProb *= To2FromProb;
     }
 
-    FromBBI.BB->removeSuccessor(Succ);
+    FromMBB.removeSuccessor(Succ);
 
     if (AddEdges) {
       // If the edge from ToBBI.BB to Succ already exists, update the
       // probability of this edge by adding NewProb to it. An example is shown
-      // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+      // below, in which A is ToBBI.BB and B is FromMBB. In this case we
       // don't have to set C as A's successor as it already is. We only need to
       // update the edge probability on A->C. Note that B will not be
       // immediately removed from A's successors. It is possible that B->D is
@@ -1807,7 +2250,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
       //
       if (ToBBI.BB->isSuccessor(Succ))
         ToBBI.BB->setSuccProbability(
-            std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+            find(ToBBI.BB->successors(), Succ),
             MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
       else
         ToBBI.BB->addSuccessor(Succ, NewProb);
@@ -1815,8 +2258,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   }
 
   // Now FromBBI always falls through to the next block!
-  if (NBB && !FromBBI.BB->isSuccessor(NBB))
-    FromBBI.BB->addSuccessor(NBB);
+  if (NBB && !FromMBB.isSuccessor(NBB))
+    FromMBB.addSuccessor(NBB);
 
   // Normalize the probabilities of ToBBI.BB's successors with all adjustment
   // we've done above.
@@ -1839,6 +2282,6 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
 }
 
 FunctionPass *
-llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
+llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) {
   return new IfConverter(std::move(Ftor));
 }
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 31d6bd0..9588dfb 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -51,6 +51,12 @@ static cl::opt<int> PageSize("imp-null-check-page-size",
                              cl::desc("The page size of the target in bytes"),
                              cl::init(4096));
 
+static cl::opt<unsigned> MaxInstsToConsider(
+    "imp-null-max-insts-to-consider",
+    cl::desc("The max number of instructions to consider hoisting loads over "
+             "(the algorithm is quadratic over this number)"),
+    cl::init(8));
+
 #define DEBUG_TYPE "implicit-null-checks"
 
 STATISTIC(NumImplicitNullChecks,
@@ -59,6 +65,44 @@ STATISTIC(NumImplicitNullChecks,
 namespace {
 
 class ImplicitNullChecks : public MachineFunctionPass {
+  /// Return true if \c computeDependence can process \p MI.
+  static bool canHandle(const MachineInstr *MI);
+
+  /// Helper function for \c computeDependence.  Return true if \p A
+  /// and \p B do not have any dependences between them, and can be
+  /// re-ordered without changing program semantics.
+  bool canReorder(const MachineInstr *A, const MachineInstr *B);
+
+  /// A data type for representing the result computed by \c
+  /// computeDependence.  States whether it is okay to reorder the
+  /// instruction passed to \c computeDependence with at most one
+  /// depednency.
+  struct DependenceResult {
+    /// Can we actually re-order \p MI with \p Insts (see \c
+    /// computeDependence).
+    bool CanReorder;
+
+    /// If non-None, then an instruction in \p Insts that also must be
+    /// hoisted.
+    Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
+
+    /*implicit*/ DependenceResult(
+        bool CanReorder,
+        Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
+        : CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
+      assert((!PotentialDependence || CanReorder) &&
+             "!CanReorder && PotentialDependence.hasValue() not allowed!");
+    }
+  };
+
+  /// Compute a result for the following question: can \p MI be
+  /// re-ordered from after \p Insts to before it.
+  ///
+  /// \c canHandle should return true for all instructions in \p
+  /// Insts.
+  DependenceResult computeDependence(const MachineInstr *MI,
+                                     ArrayRef<MachineInstr *> Insts);
+
   /// Represents one null check that can be made implicit.
   class NullCheck {
     // The memory operation the null check can be folded into.
@@ -114,6 +158,19 @@ class ImplicitNullChecks : public MachineFunctionPass {
                                    MachineBasicBlock *HandlerMBB);
   void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
 
+  /// Is \p MI a memory operation that can be used to implicitly null check the
+  /// value in \p PointerReg?  \p PrevInsts is the set of instruction seen since
+  /// the explicit null check on \p PointerReg.
+  bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+                          ArrayRef<MachineInstr *> PrevInsts);
+
+  /// Return true if \p FaultingMI can be hoisted from after the the
+  /// instructions in \p InstsSeenSoFar to before them.  Set \p Dependence to a
+  /// non-null value if we also need to (and legally can) hoist a depedency.
+  bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
+                        ArrayRef<MachineInstr *> InstsSeenSoFar,
+                        MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+
 public:
   static char ID;
 
@@ -129,160 +186,70 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 
-/// \brief Detect re-ordering hazards and dependencies.
-///
-/// This class keeps track of defs and uses, and can be queried if a given
-/// machine instruction can be re-ordered from after the machine instructions
-/// seen so far to before them.
-class HazardDetector {
-  static MachineInstr *getUnknownMI() {
-    return DenseMapInfo<MachineInstr *>::getTombstoneKey();
-  }
-
-  // Maps physical registers to the instruction defining them.  If there has
-  // been more than one def of an specific register, that register is mapped to
-  // getUnknownMI().
-  DenseMap<unsigned, MachineInstr *> RegDefs;
-  DenseSet<unsigned> RegUses;
-  const TargetRegisterInfo &TRI;
-  bool hasSeenClobber;
-  AliasAnalysis &AA;
-
-public:
-  explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA)
-      : TRI(TRI), hasSeenClobber(false), AA(AA) {}
+}
 
-  /// \brief Make a note of \p MI for later queries to isSafeToHoist.
-  ///
-  /// May clobber this HazardDetector instance.  \see isClobbered.
-  void rememberInstruction(MachineInstr *MI);
+bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
+  if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+    return false;
+  auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
+  (void)IsRegMask;
 
-  /// \brief Return true if it is safe to hoist \p MI from after all the
-  /// instructions seen so far (via rememberInstruction) to before it.  If \p MI
-  /// has one and only one transitive dependency, set \p Dependency to that
-  /// instruction.  If there are more dependencies, return false.
-  bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency);
+  assert(!llvm::any_of(MI->operands(), IsRegMask) &&
+         "Calls were filtered out above!");
 
-  /// \brief Return true if this instance of HazardDetector has been clobbered
-  /// (i.e. has no more useful information).
-  ///
-  /// A HazardDetecter is clobbered when it sees a construct it cannot
-  /// understand, and it would have to return a conservative answer for all
-  /// future queries.  Having a separate clobbered state lets the client code
-  /// bail early, without making queries about all of the future instructions
-  /// (which would have returned the most conservative answer anyway).
-  ///
-  /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
-  /// is an error.
-  bool isClobbered() { return hasSeenClobber; }
-};
+  auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
+  return llvm::all_of(MI->memoperands(), IsUnordered);
 }
 
+ImplicitNullChecks::DependenceResult
+ImplicitNullChecks::computeDependence(const MachineInstr *MI,
+                                      ArrayRef<MachineInstr *> Block) {
+  assert(llvm::all_of(Block, canHandle) && "Check this first!");
+  assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!");
 
-void HazardDetector::rememberInstruction(MachineInstr *MI) {
-  assert(!isClobbered() &&
-         "Don't add instructions to a clobbered hazard detector");
+  Optional<ArrayRef<MachineInstr *>::iterator> Dep;
 
-  if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
-    hasSeenClobber = true;
-    return;
-  }
+  for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
+    if (canReorder(*I, MI))
+      continue;
 
-  for (auto *MMO : MI->memoperands()) {
-    // Right now we don't want to worry about LLVM's memory model.
-    if (!MMO->isUnordered()) {
-      hasSeenClobber = true;
-      return;
+    if (Dep == None) {
+      // Found one possible dependency, keep track of it.
+      Dep = I;
+    } else {
+      // We found two dependencies, so bail out.
+      return {false, None};
     }
   }
 
-  for (auto &MO : MI->operands()) {
-    if (!MO.isReg() || !MO.getReg())
-      continue;
-
-    if (MO.isDef()) {
-      auto It = RegDefs.find(MO.getReg());
-      if (It == RegDefs.end())
-        RegDefs.insert({MO.getReg(), MI});
-      else {
-        assert(It->second && "Found null MI?");
-        It->second = getUnknownMI();
-      }
-    } else
-      RegUses.insert(MO.getReg());
-  }
+  return {true, Dep};
 }
 
-bool HazardDetector::isSafeToHoist(MachineInstr *MI,
-                                   MachineInstr *&Dependency) {
-  assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
-  Dependency = nullptr;
+bool ImplicitNullChecks::canReorder(const MachineInstr *A,
+                                    const MachineInstr *B) {
+  assert(canHandle(A) && canHandle(B) && "Precondition!");
 
-  // Right now we don't want to worry about LLVM's memory model.  This can be
-  // made more precise later.
-  for (auto *MMO : MI->memoperands())
-    if (!MMO->isUnordered())
-      return false;
+  // canHandle makes sure that we _can_ correctly analyze the dependencies
+  // between A and B here -- for instance, we should not be dealing with heap
+  // load-store dependencies here.
 
-  for (auto &MO : MI->operands()) {
-    if (MO.isReg() && MO.getReg()) {
-      for (auto &RegDef : RegDefs) {
-        unsigned Reg = RegDef.first;
-        MachineInstr *MI = RegDef.second;
-        if (!TRI.regsOverlap(Reg, MO.getReg()))
-          continue;
+  for (auto MOA : A->operands()) {
+    if (!(MOA.isReg() && MOA.getReg()))
+      continue;
 
-        // We found a write-after-write or read-after-write, see if the
-        // instruction causing this dependency can be hoisted too.
-
-        if (MI == getUnknownMI())
-          // We don't have precise dependency information.
-          return false;
-
-        if (Dependency) {
-          if (Dependency == MI)
-            continue;
-          // We already have one dependency, and we can track only one.
-          return false;
-        }
-
-        // Now check if MI is actually a dependency that can be hoisted.
-
-        // We don't want to track transitive dependencies.  We already know that
-        // MI is the only instruction that defines Reg, but we need to be sure
-        // that it does not use any registers that have been defined (trivially
-        // checked below by ensuring that there are no register uses), and that
-        // it is the only def for every register it defines (otherwise we could
-        // violate a write after write hazard).
-        auto IsMIOperandSafe = [&](MachineOperand &MO) {
-          if (!MO.isReg() || !MO.getReg())
-            return true;
-          if (MO.isUse())
-            return false;
-          assert((!MO.isDef() || RegDefs.count(MO.getReg())) &&
-                 "All defs must be tracked in RegDefs by now!");
-          return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI;
-        };
-
-        if (!all_of(MI->operands(), IsMIOperandSafe))
-          return false;
-
-        // Now check for speculation safety:
-        bool SawStore = true;
-        if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad())
-          return false;
-
-        Dependency = MI;
-      }
+    unsigned RegA = MOA.getReg();
+    for (auto MOB : B->operands()) {
+      if (!(MOB.isReg() && MOB.getReg()))
+        continue;
 
-      if (MO.isDef())
-        for (unsigned Reg : RegUses)
-          if (TRI.regsOverlap(Reg, MO.getReg()))
-            return false;  // We found a write-after-read
+      unsigned RegB = MOB.getReg();
+
+      if (TRI->regsOverlap(RegA, RegB))
+        return false;
     }
   }
 
@@ -316,6 +283,96 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
   return false;
 }
 
+bool ImplicitNullChecks::isSuitableMemoryOp(
+    MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+  int64_t Offset;
+  unsigned BaseReg;
+
+  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
+      BaseReg != PointerReg)
+    return false;
+
+  // We want the load to be issued at a sane offset from PointerReg, so that
+  // if PointerReg is null then the load reliably page faults.
+  if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
+    return false;
+
+  // Finally, we need to make sure that the load instruction actually is
+  // loading from PointerReg, and there isn't some re-definition of PointerReg
+  // between the compare and the load.
+  for (auto *PrevMI : PrevInsts)
+    for (auto &PrevMO : PrevMI->operands())
+      if (PrevMO.isReg() && PrevMO.getReg() &&
+          TRI->regsOverlap(PrevMO.getReg(), PointerReg))
+        return false;
+
+  return true;
+}
+
+bool ImplicitNullChecks::canHoistLoadInst(
+    MachineInstr *FaultingMI, unsigned PointerReg,
+    ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
+    MachineInstr *&Dependence) {
+  auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
+  if (!DepResult.CanReorder)
+    return false;
+
+  if (!DepResult.PotentialDependence) {
+    Dependence = nullptr;
+    return true;
+  }
+
+  auto DependenceItr = *DepResult.PotentialDependence;
+  auto *DependenceMI = *DependenceItr;
+
+  // We don't want to reason about speculating loads.  Note -- at this point
+  // we should have already filtered out all of the other non-speculatable
+  // things, like calls and stores.
+  assert(canHandle(DependenceMI) && "Should never have reached here!");
+  if (DependenceMI->mayLoad())
+    return false;
+
+  for (auto &DependenceMO : DependenceMI->operands()) {
+    if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+      continue;
+
+    // Make sure that we won't clobber any live ins to the sibling block by
+    // hoisting Dependency.  For instance, we can't hoist INST to before the
+    // null check (even if it safe, and does not violate any dependencies in
+    // the non_null_block) if %rdx is live in to _null_block.
+    //
+    //    test %rcx, %rcx
+    //    je _null_block
+    //  _non_null_block:
+    //    %rdx<def> = INST
+    //    ...
+    //
+    // This restriction does not apply to the faulting load inst because in
+    // case the pointer loaded from is in the null page, the load will not
+    // semantically execute, and affect machine state.  That is, if the load
+    // was loading into %rax and it faults, the value of %rax should stay the
+    // same as it would have been had the load not have executed and we'd have
+    // branched to NullSucc directly.
+    if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+      return false;
+
+    // The Dependency can't be re-defining the base register -- then we won't
+    // get the memory operation on the address we want.  This is already
+    // checked in \c IsSuitableMemoryOp.
+    assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+           "Should have been checked before!");
+  }
+
+  auto DepDepResult =
+      computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
+
+  if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence)
+    return false;
+
+  Dependence = DependenceMI;
+  return true;
+}
+
 /// Analyze MBB to check if its terminating branch can be turned into an
 /// implicit null check.  If yes, append a description of the said null check to
 /// NullCheckList and return true, else return false.
@@ -415,63 +472,24 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   // ptr could be some non-null invalid reference that never gets loaded from
   // because some_cond is always true.
 
-  unsigned PointerReg = MBP.LHS.getReg();
-
-  HazardDetector HD(*TRI, *AA);
-
-  for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
-       ++MII) {
-    MachineInstr &MI = *MII;
-    unsigned BaseReg;
-    int64_t Offset;
-    MachineInstr *Dependency = nullptr;
-    if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
-      if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg &&
-          Offset < PageSize && MI.getDesc().getNumDefs() <= 1 &&
-          HD.isSafeToHoist(&MI, Dependency)) {
-
-        auto DependencyOperandIsOk = [&](MachineOperand &MO) {
-          assert(!(MO.isReg() && MO.isUse()) &&
-                 "No transitive dependendencies please!");
-          if (!MO.isReg() || !MO.getReg() || !MO.isDef())
-            return true;
-
-          // Make sure that we won't clobber any live ins to the sibling block
-          // by hoisting Dependency.  For instance, we can't hoist INST to
-          // before the null check (even if it safe, and does not violate any
-          // dependencies in the non_null_block) if %rdx is live in to
-          // _null_block.
-          //
-          //    test %rcx, %rcx
-          //    je _null_block
-          //  _non_null_block:
-          //    %rdx<def> = INST
-          //    ...
-          if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg()))
-            return false;
-
-          // Make sure Dependency isn't re-defining the base register.  Then we
-          // won't get the memory operation on the address we want.
-          if (TRI->regsOverlap(MO.getReg(), BaseReg))
-            return false;
-
-          return true;
-        };
-
-        bool DependencyOperandsAreOk =
-            !Dependency ||
-            all_of(Dependency->operands(), DependencyOperandIsOk);
-
-        if (DependencyOperandsAreOk) {
-          NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
-                                     NullSucc, Dependency);
-          return true;
-        }
-      }
+  const unsigned PointerReg = MBP.LHS.getReg();
 
-    HD.rememberInstruction(&MI);
-    if (HD.isClobbered())
+  SmallVector<MachineInstr *, 8> InstsSeenSoFar;
+
+  for (auto &MI : *NotNullSucc) {
+    if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider)
       return false;
+
+    MachineInstr *Dependence;
+    if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
+        canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
+                         Dependence)) {
+      NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+                                 NullSucc, Dependence);
+      return true;
+    }
+
+    InstsSeenSoFar.push_back(&MI);
   }
 
   return false;
@@ -518,7 +536,7 @@ void ImplicitNullChecks::rewriteNullChecks(
 
   for (auto &NC : NullCheckList) {
     // Remove the conditional branch dependent on the null check.
-    unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock());
+    unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
     (void)BranchesRemoved;
     assert(BranchesRemoved > 0 && "expected at least one branch!");
 
@@ -560,13 +578,14 @@ void ImplicitNullChecks::rewriteNullChecks(
     NC.getCheckOperation()->eraseFromParent();
 
     // Insert an *unconditional* branch to not-null successor.
-    TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+    TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
                       /*Cond=*/None, DL);
 
     NumImplicitNullChecks++;
   }
 }
 
+
 char ImplicitNullChecks::ID = 0;
 char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
 INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 197db77..3d81184 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -114,7 +114,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
         TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -172,7 +172,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
         TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -185,10 +185,7 @@ private:
   bool isSnippet(const LiveInterval &SnipLI);
   void collectRegsToSpill();
 
-  bool isRegToSpill(unsigned Reg) {
-    return std::find(RegsToSpill.begin(),
-                     RegsToSpill.end(), Reg) != RegsToSpill.end();
-  }
+  bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); }
 
   bool isSibling(unsigned Reg);
   bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
@@ -380,7 +377,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
   MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
   MachineBasicBlock::iterator MII;
   if (SrcVNI->isPHIDef())
-    MII = MBB->SkipPHIsAndLabels(MBB->begin());
+    MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
   else {
     MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
     assert(DefMI && "Defining instruction disappeared");
@@ -553,12 +550,18 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
     return true;
   }
 
-  // Alocate a new register for the remat.
+  // Allocate a new register for the remat.
   unsigned NewVReg = Edit->createFrom(Original);
 
   // Finally we can rematerialize OrigMI before MI.
   SlotIndex DefIdx =
       Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+
+  // We take the DebugLoc from MI, since OrigMI may be attributed to a
+  // different source location. 
+  auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
+  NewMI->setDebugLoc(MI.getDebugLoc());
+
   (void)DefIdx;
   DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
                << *LIS.getInstructionFromIndex(DefIdx));
@@ -736,9 +739,12 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
   bool WasCopy = MI->isCopy();
   unsigned ImpReg = 0;
 
-  bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
-                       MI->getOpcode() == TargetOpcode::PATCHPOINT ||
-                       MI->getOpcode() == TargetOpcode::STACKMAP);
+  // Spill subregs if the target allows it.
+  // We always want to spill subregs for stackmap/patchpoint pseudos.
+  bool SpillSubRegs = TII.isSubregFoldable() ||
+                      MI->getOpcode() == TargetOpcode::STATEPOINT ||
+                      MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+                      MI->getOpcode() == TargetOpcode::STACKMAP;
 
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
@@ -751,7 +757,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       ImpReg = MO.getReg();
       continue;
     }
-    // FIXME: Teach targets to deal with subregs.
+
     if (!SpillSubRegs && MO.getSubReg())
       return false;
     // We cannot fold a load instruction into a def.
@@ -762,6 +768,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       FoldOps.push_back(Idx);
   }
 
+  // If we only have implicit uses, we won't be able to fold that.
+  // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try!
+  if (FoldOps.empty())
+    return false;
+
   MachineInstrSpan MIS(MI);
 
   MachineInstr *FoldMI =
@@ -1113,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
   // earlier spill with smaller SlotIndex.
   for (const auto CurrentSpill : Spills) {
     MachineBasicBlock *Block = CurrentSpill->getParent();
-    MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+    MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
     MachineInstr *PrevSpill = SpillBBToSpill[Node];
     if (PrevSpill) {
       SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@@ -1121,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
       MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
       MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
       SpillsToRm.push_back(SpillToRm);
-      SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
     } else {
-      SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
     }
   }
   for (const auto SpillToRm : SpillsToRm)
@@ -1198,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
   // Sort the nodes in WorkSet in top-down order and save the nodes
   // in Orders. Orders will be used for hoisting in runHoistSpills.
   unsigned idx = 0;
-  Orders.push_back(MDT.DT->getNode(Root));
+  Orders.push_back(MDT.getBase().getNode(Root));
   do {
     MachineDomTreeNode *Node = Orders[idx++];
     const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 3f11119..ec35b3f 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -29,6 +29,9 @@
 // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
 // intrinsic in ARM backend.
 //
+// In X86, this can be further optimized into a set of target
+// specific loads followed by an optimized sequence of shuffles.
+//
 // E.g. An interleaved store (Factor = 3):
 //        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
 //                                    <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
@@ -37,6 +40,8 @@
 // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
 // intrinsic in ARM backend.
 //
+// Similarly, a set of interleaved stores can be transformed into an optimized
+// sequence of shuffles followed by a set of target specific stores for X86.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
@@ -57,8 +62,6 @@ static cl::opt<bool> LowerInterleavedAccesses(
     cl::desc("Enable lowering interleaved accesses to intrinsics"),
     cl::init(true), cl::Hidden);
 
-static unsigned MaxFactor; // The maximum supported interleave factor.
-
 namespace {
 
 class InterleavedAccess : public FunctionPass {
@@ -70,7 +73,7 @@ public:
     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override { return "Interleaved Access Pass"; }
+  StringRef getPassName() const override { return "Interleaved Access Pass"; }
 
   bool runOnFunction(Function &F) override;
 
@@ -84,6 +87,9 @@ private:
   const TargetMachine *TM;
   const TargetLowering *TLI;
 
+  /// The maximum supported interleave factor.
+  unsigned MaxFactor;
+
   /// \brief Transform an interleaved load into target specific intrinsics.
   bool lowerInterleavedLoad(LoadInst *LI,
                             SmallVector<Instruction *, 32> &DeadInsts);
@@ -144,7 +150,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
 ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
 ///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
 static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned &Index) {
+                               unsigned &Index, unsigned MaxFactor) {
   if (Mask.size() < 2)
     return false;
 
@@ -156,13 +162,19 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
   return false;
 }
 
-/// \brief Check if the mask is RE-interleave mask for an interleaved store.
-///
-/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+/// \brief Check if the mask can be used in an interleaved store.
+//
+/// It checks for a more general pattern than the RE-interleave mask.
+/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
+/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
 ///
-/// E.g. The RE-interleave mask (Factor = 2) could be:
-///     <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
+/// The particular case of an RE-interleave mask is:
+/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+                               unsigned MaxFactor, unsigned OpNumElts) {
   unsigned NumElts = Mask.size();
   if (NumElts < 4)
     return false;
@@ -172,21 +184,75 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
     if (NumElts % Factor)
       continue;
 
-    unsigned NumSubElts = NumElts / Factor;
-    if (!isPowerOf2_32(NumSubElts))
+    unsigned LaneLen = NumElts / Factor;
+    if (!isPowerOf2_32(LaneLen))
       continue;
 
-    // Check whether each element matchs the RE-interleaved rule. Ignore undef
-    // elements.
-    unsigned i = 0;
-    for (; i < NumElts; i++)
-      if (Mask[i] >= 0 &&
-          static_cast<unsigned>(Mask[i]) !=
-              (i % Factor) * NumSubElts + i / Factor)
+    // Check whether each element matches the general interleaved rule.
+    // Ignore undef elements, as long as the defined elements match the rule.
+    // Outer loop processes all factors (x, y, z in the above example)
+    unsigned I = 0, J;
+    for (; I < Factor; I++) {
+      unsigned SavedLaneValue;
+      unsigned SavedNoUndefs = 0;
+
+      // Inner loop processes consecutive accesses (x, x+1... in the example)
+      for (J = 0; J < LaneLen - 1; J++) {
+        // Lane computes x's position in the Mask
+        unsigned Lane = J * Factor + I;
+        unsigned NextLane = Lane + Factor;
+        int LaneValue = Mask[Lane];
+        int NextLaneValue = Mask[NextLane];
+
+        // If both are defined, values must be sequential
+        if (LaneValue >= 0 && NextLaneValue >= 0 &&
+            LaneValue + 1 != NextLaneValue)
+          break;
+
+        // If the next value is undef, save the current one as reference
+        if (LaneValue >= 0 && NextLaneValue < 0) {
+          SavedLaneValue = LaneValue;
+          SavedNoUndefs = 1;
+        }
+
+        // Undefs are allowed, but defined elements must still be consecutive:
+        // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+        // Verify this by storing the last non-undef followed by an undef
+        // Check that following non-undef masks are incremented with the
+        // corresponding distance.
+        if (SavedNoUndefs > 0 && LaneValue < 0) {
+          SavedNoUndefs++;
+          if (NextLaneValue >= 0 &&
+              SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+            break;
+        }
+      }
+
+      if (J < LaneLen - 1)
         break;
 
-    // Find a RE-interleaved mask of current factor.
-    if (i == NumElts)
+      int StartMask = 0;
+      if (Mask[I] >= 0) {
+        // Check that the start of the I range (J=0) is greater than 0
+        StartMask = Mask[I];
+      } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+        // StartMask defined by the last value in lane
+        StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+      } else if (SavedNoUndefs > 0) {
+        // StartMask defined by some non-zero value in the j loop
+        StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+      }
+      // else StartMask remains set to 0, i.e. all elements are undefs
+
+      if (StartMask < 0)
+        break;
+      // We must stay within the vectors; This case can happen with undefs.
+      if (StartMask + LaneLen > OpNumElts*2)
+        break;
+    }
+
+    // Found an interleaved mask of current factor.
+    if (I == Factor)
       return true;
   }
 
@@ -224,7 +290,8 @@ bool InterleavedAccess::lowerInterleavedLoad(
   unsigned Factor, Index;
 
   // Check if the first shufflevector is DE-interleave shuffle.
-  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
+  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
+                          MaxFactor))
     return false;
 
   // Holds the corresponding index for each DE-interleave shuffle.
@@ -342,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(
 
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
+  unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
     return false;
 
   DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 2962f87..afd2406 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -436,8 +436,14 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     errs() << "WARNING: this target does not support the llvm."
            << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
              "return" : "frame") << "address intrinsic.\n";
-    CI->replaceAllUsesWith(ConstantPointerNull::get(
-                                            cast<PointerType>(CI->getType())));
+    CI->replaceAllUsesWith(
+        ConstantPointerNull::get(cast<PointerType>(CI->getType())));
+    break;
+  case Intrinsic::addressofreturnaddress:
+    errs() << "WARNING: this target does not support the "
+              "llvm.addressofreturnaddress intrinsic.\n";
+    CI->replaceAllUsesWith(
+        ConstantPointerNull::get(cast<PointerType>(CI->getType())));
     break;
 
   case Intrinsic::prefetch:
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 9eb43d2..26794e2 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -102,25 +101,12 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
   });
 }
 
-MachineModuleInfo &
-LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const {
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
-                                                 *getMCRegisterInfo(),
-                                                 getObjFileLowering());
-  PM.add(MMI);
-  return *MMI;
-}
-
-void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM,
-    MachineFunctionInitializer *MFInitializer) const {
-  PM.add(new MachineFunctionAnalysis(*this, MFInitializer));
-}
-
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
                         bool DisableVerify, AnalysisID StartBefore,
-                        AnalysisID StartAfter, AnalysisID StopAfter,
+                        AnalysisID StartAfter, AnalysisID StopBefore,
+                        AnalysisID StopAfter,
                         MachineFunctionInitializer *MFInitializer = nullptr) {
 
   // When in emulated TLS mode, add the LowerEmuTLS pass.
@@ -135,7 +121,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
+  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
+                                 StopAfter);
 
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
@@ -150,8 +137,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
 
   PassConfig->addISelPrepare();
 
-  MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM);
-  TM->addMachineFunctionAnalysis(PM, MFInitializer);
+  MachineModuleInfo *MMI = new MachineModuleInfo(TM);
+  MMI->setMachineFunctionInitializer(MFInitializer);
+  PM.add(MMI);
 
   // Enable FastISel with -fast, but allow that to be overridden.
   TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
@@ -165,6 +153,11 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
     if (PassConfig->addIRTranslator())
       return nullptr;
 
+    PassConfig->addPreLegalizeMachineIR();
+
+    if (PassConfig->addLegalizeMachineIR())
+      return nullptr;
+
     // Before running the register bank selector, ask the target if it
     // wants to run some passes.
     PassConfig->addPreRegBankSelect();
@@ -172,6 +165,21 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
     if (PassConfig->addRegBankSelect())
       return nullptr;
 
+    PassConfig->addPreGlobalInstructionSelect();
+
+    if (PassConfig->addGlobalInstructionSelect())
+      return nullptr;
+
+    // Pass to reset the MachineFunction if the ISel failed.
+    PM.add(createResetMachineFunctionPass(
+        PassConfig->reportDiagnosticWhenGlobalISelFallback()));
+
+    // Provide a fallback path when we do not want to abort on
+    // not-yet-supported input.
+    if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
+        PassConfig->addInstSelector())
+      return nullptr;
+
   } else if (PassConfig->addInstSelector())
     return nullptr;
 
@@ -179,21 +187,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
 
   PassConfig->setInitialized();
 
-  return &MMI.getContext();
+  return &MMI->getContext();
 }
 
 bool LLVMTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
     bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
-    AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
+    AnalysisID StopBefore, AnalysisID StopAfter,
+    MachineFunctionInitializer *MFInitializer) {
   // Add common CodeGen passes.
   MCContext *Context =
       addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
-                              StopAfter, MFInitializer);
+                              StopBefore, StopAfter, MFInitializer);
   if (!Context)
     return true;
 
-  if (StopAfter) {
+  if (StopBefore || StopAfter) {
     PM.add(createPrintMIRPass(Out));
     return false;
   }
@@ -219,7 +228,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
       MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
 
     MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                       Options.MCOptions);
     auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
         *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@@ -233,7 +243,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
     MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                       Options.MCOptions);
     if (!MCE || !MAB)
       return true;
 
@@ -261,6 +272,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     return true;
 
   PM.add(Printer);
+  PM.add(createFreeMachineFunctionPass());
 
   return false;
 }
@@ -275,7 +287,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
-                                nullptr);
+                                nullptr, nullptr);
   if (!Ctx)
     return true;
 
@@ -288,7 +300,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   MCCodeEmitter *MCE =
       getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
   MCAsmBackend *MAB =
-      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                     Options.MCOptions);
   if (!MCE || !MAB)
     return true;
 
@@ -306,6 +319,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
     return true;
 
   PM.add(Printer);
+  PM.add(createFreeMachineFunctionPass());
 
   return false; // success!
 }
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 4321849..86ef898 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -133,7 +133,7 @@ SUnit *LatencyPriorityQueue::pop() {
 
 void LatencyPriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
-  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  std::vector<SUnit *>::iterator I = find(Queue, SU);
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
   Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index b810176..834ed5f 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -222,17 +222,13 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
     LexicalScope *WS = WorkStack.back();
     const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
     bool visitedChildren = false;
-    for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
-                                                         SE = Children.end();
-         SI != SE; ++SI) {
-      LexicalScope *ChildScope = *SI;
+    for (auto &ChildScope : Children)
       if (!ChildScope->getDFSOut()) {
         WorkStack.push_back(ChildScope);
         visitedChildren = true;
         ChildScope->setDFSIn(++Counter);
         break;
       }
-    }
     if (!visitedChildren) {
       WorkStack.pop_back();
       WS->setDFSOut(++Counter);
@@ -247,10 +243,7 @@ void LexicalScopes::assignInstructionRanges(
     DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
 
   LexicalScope *PrevLexicalScope = nullptr;
-  for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
-                                                  RE = MIRanges.end();
-       RI != RE; ++RI) {
-    const InsnRange &R = *RI;
+  for (const auto &R : MIRanges) {
     LexicalScope *S = MI2ScopeMap.lookup(R.first);
     assert(S && "Lost LexicalScope for a machine instruction!");
     if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
@@ -281,12 +274,8 @@ void LexicalScopes::getMachineBasicBlocks(
   }
 
   SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
-  for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
-                                            E = InsnRanges.end();
-       I != E; ++I) {
-    InsnRange &R = *I;
+  for (auto &R : InsnRanges)
     MBBs.insert(R.first->getParent());
-  }
 }
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
@@ -301,9 +290,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
     return true;
 
   bool Result = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-       ++I) {
-    if (const DILocation *IDL = I->getDebugLoc())
+  for (auto &I : *MBB) {
+    if (const DILocation *IDL = I.getDebugLoc())
       if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
         if (Scope->dominates(IScope))
           return true;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 4ff88d5..c945376 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +61,26 @@ class LiveDebugValues : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
+  LexicalScopes LS;
+
+  /// Keeps track of lexical scopes associated with a user value's source
+  /// location.
+  class UserValueScopes {
+    DebugLoc DL;
+    LexicalScopes &LS;
+    SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+  public:
+    UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
+
+    /// Return true if current scope dominates at least one machine
+    /// instruction in a given machine basic block.
+    bool dominates(MachineBasicBlock *MBB) {
+      if (LBlocks.empty())
+        LS.getMachineBasicBlocks(DL, LBlocks);
+      return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
+    }
+  };
 
   /// Based on std::pair so it can be used as an index into a DenseMap.
   typedef std::pair<const DILocalVariable *, const DILocation *>
@@ -83,7 +104,7 @@ private:
   struct VarLoc {
     const DebugVariable Var;
     const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
-
+    mutable UserValueScopes UVS;
     enum { InvalidKind = 0, RegisterKind } Kind;
 
     /// The value location. Stored separately to avoid repeatedly
@@ -96,9 +117,9 @@ private:
       uint64_t Hash;
     } Loc;
 
-    VarLoc(const MachineInstr &MI)
+    VarLoc(const MachineInstr &MI, LexicalScopes &LS)
         : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
-          Kind(InvalidKind) {
+          UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) {
       static_assert((sizeof(Loc) == sizeof(uint64_t)),
                     "hash does not cover all members of Loc");
       assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -125,6 +146,10 @@ private:
       return 0;
     }
 
+    /// Determine whether the lexical scope of this value's debug location
+    /// dominates MBB.
+    bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
+
     void dump() const { MI.dump(); }
 
     bool operator==(const VarLoc &Other) const {
@@ -201,7 +226,8 @@ private:
                 VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
 
   bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
-            const VarLocMap &VarLocIDs);
+            const VarLocMap &VarLocIDs,
+            SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
 
   bool ExtendRanges(MachineFunction &MF);
 
@@ -217,7 +243,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
   /// Print to ostream with a message.
@@ -228,6 +254,7 @@ public:
   /// Calculate the liveness information for the given machine function.
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -260,6 +287,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
                                        const VarLocMap &VarLocIDs,
                                        const char *msg,
                                        raw_ostream &Out) const {
+  Out << '\n' << msg << '\n';
   for (const MachineBasicBlock &BB : MF) {
     const auto &L = V.lookup(&BB);
     Out << "MBB: " << BB.getName() << ":\n";
@@ -268,7 +296,6 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
       Out << " Var: " << VL.Var.getVar()->getName();
       Out << " MI: ";
       VL.dump();
-      Out << "\n";
     }
   }
   Out << "\n";
@@ -294,7 +321,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   // Add the VarLoc to OpenRanges from this DBG_VALUE.
   // TODO: Currently handles DBG_VALUE which has only reg as location.
   if (isDbgValueDescribedByReg(MI)) {
-    VarLoc VL(MI);
+    VarLoc VL(MI, LS);
     unsigned ID = VarLocIDs.insert(VL);
     OpenRanges.insert(ID, VL.Var);
   }
@@ -368,7 +395,8 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
 /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
 /// source variable in all the predecessors of @MBB reside in the same location.
 bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
-                           VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) {
+                           VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
+                           SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
   DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
   bool Changed = false;
 
@@ -376,21 +404,39 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
 
   // For all predecessors of this MBB, find the set of VarLocs that
   // can be joined.
+  int NumVisited = 0;
   for (auto p : MBB.predecessors()) {
+    // Ignore unvisited predecessor blocks.  As we are processing
+    // the blocks in reverse post-order any unvisited block can
+    // be considered to not remove any incoming values.
+    if (!Visited.count(p))
+      continue;
     auto OL = OutLocs.find(p);
     // Join is null in case of empty OutLocs from any of the pred.
     if (OL == OutLocs.end())
       return false;
 
-    // Just copy over the Out locs to incoming locs for the first predecessor.
-    if (p == *MBB.pred_begin()) {
+    // Just copy over the Out locs to incoming locs for the first visited
+    // predecessor, and for all other predecessors join the Out locs.
+    if (!NumVisited)
       InLocsT = OL->second;
-      continue;
-    }
-    // Join with this predecessor.
-    InLocsT &= OL->second;
+    else
+      InLocsT &= OL->second;
+    NumVisited++;
   }
 
+  // Filter out DBG_VALUES that are out of scope.
+  VarLocSet KillSet;
+  for (auto ID : InLocsT)
+    if (!VarLocIDs[ID].dominates(MBB))
+      KillSet.set(ID);
+  InLocsT.intersectWithComplement(KillSet);
+
+  // As we are processing blocks in reverse post-order we
+  // should have processed at least one predecessor, unless it
+  // is the entry block which has no predecessor.
+  assert((NumVisited || MBB.pred_empty()) &&
+         "Should have processed at least one predecessor");
   if (InLocsT.empty())
     return false;
 
@@ -463,16 +509,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   // To solve it, we perform join() and transfer() using the two worklist method
   // until the ranges converge.
   // Ranges have converged when both worklists are empty.
+  SmallPtrSet<const MachineBasicBlock *, 16> Visited;
   while (!Worklist.empty() || !Pending.empty()) {
     // We track what is on the pending worklist to avoid inserting the same
     // thing twice.  We could avoid this with a custom priority queue, but this
     // is probably not worth it.
     SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+    DEBUG(dbgs() << "Processing Worklist\n");
     while (!Worklist.empty()) {
       MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
       Worklist.pop();
-      MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs);
-
+      MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+      Visited.insert(MBB);
       if (MBBJoined) {
         MBBJoined = false;
         Changed = true;
@@ -505,12 +553,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
 }
 
 bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+  if (!MF.getFunction()->getSubprogram())
+    // LiveDebugValues will already have removed all DBG_VALUEs.
+    return false;
+
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
+  LS.initialize(MF);
 
-  bool Changed = false;
-
-  Changed |= ExtendRanges(MF);
-
+  bool Changed = ExtendRanges(MF);
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 966b4f1..0934d8c 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -22,7 +22,6 @@
 #include "LiveDebugVariables.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -76,27 +75,6 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullpt
 /// LocMap - Map of where a user value is live, and its location.
 typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
 
-namespace {
-/// UserValueScopes - Keeps track of lexical scopes associated with a
-/// user value's source location.
-class UserValueScopes {
-  DebugLoc DL;
-  LexicalScopes &LS;
-  SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
-
-public:
-  UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
-
-  /// dominates - Return true if current scope dominates at least one machine
-  /// instruction in a given machine basic block.
-  bool dominates(MachineBasicBlock *MBB) {
-    if (LBlocks.empty())
-      LS.getMachineBasicBlocks(DL, LBlocks);
-    return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
-  }
-};
-} // end anonymous namespace
-
 /// UserValue - A user value is a part of a debug info user variable.
 ///
 /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
@@ -221,8 +199,8 @@ public:
       I.setValue(getLocationNo(LocMO));
   }
 
-  /// extendDef - Extend the current definition as far as possible down the
-  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// extendDef - Extend the current definition as far as possible down.
+  /// Stop when meeting an existing def or when leaving the live
   /// range of VNI.
   /// End points where VNI is no longer live are added to Kills.
   /// @param Idx   Starting point for the definition.
@@ -231,12 +209,10 @@ public:
   /// @param VNI   When LR is not null, this is the value to restrict to.
   /// @param Kills Append end points of VNI's live range to Kills.
   /// @param LIS   Live intervals analysis.
-  /// @param MDT   Dominator tree.
   void extendDef(SlotIndex Idx, unsigned LocNo,
                  LiveRange *LR, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
-                 LiveIntervals &LIS, MachineDominatorTree &MDT,
-                 UserValueScopes &UVS);
+                 LiveIntervals &LIS);
 
   /// addDefsFromCopies - The value in LI/LocNo may be copies to other
   /// registers. Determine if any of the copies are available at the kill
@@ -254,8 +230,7 @@ public:
   /// computeIntervals - Compute the live intervals of all locations after
   /// collecting all their def points.
   void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-                        LiveIntervals &LIS, MachineDominatorTree &MDT,
-                        UserValueScopes &UVS);
+                        LiveIntervals &LIS);
 
   /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
   /// live. Returns true if any changes were made.
@@ -283,8 +258,6 @@ class LDVImpl {
   LocMap::Allocator allocator;
   MachineFunction *MF;
   LiveIntervals *LIS;
-  LexicalScopes LS;
-  MachineDominatorTree *MDT;
   const TargetRegisterInfo *TRI;
 
   /// Whether emitDebugValues is called.
@@ -342,7 +315,6 @@ public:
            "Dbg values are not emitted in LDV");
     EmitDone = false;
     ModifiedMF = false;
-    LS.reset();
   }
 
   /// mapVirtReg - Map virtual register to an equivalence class.
@@ -541,8 +513,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
 /// data-flow analysis to propagate them beyond basic block boundaries.
 void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
                           const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
-                          LiveIntervals &LIS, MachineDominatorTree &MDT,
-                          UserValueScopes &UVS) {
+                          LiveIntervals &LIS) {
   SlotIndex Start = Idx;
   MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
   SlotIndex Stop = LIS.getMBBEndIdx(MBB);
@@ -660,9 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
 void
 UserValue::computeIntervals(MachineRegisterInfo &MRI,
                             const TargetRegisterInfo &TRI,
-                            LiveIntervals &LIS,
-                            MachineDominatorTree &MDT,
-                            UserValueScopes &UVS) {
+                            LiveIntervals &LIS) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -677,7 +646,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
     const MachineOperand &Loc = locations[LocNo];
 
     if (!Loc.isReg()) {
-      extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS);
+      extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS);
       continue;
     }
 
@@ -690,7 +659,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
         VNI = LI->getVNInfoAt(Idx);
       }
       SmallVector<SlotIndex, 16> Kills;
-      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS);
       if (LI)
         addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
       continue;
@@ -701,7 +670,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
     LiveRange *LR = &LIS.getRegUnit(Unit);
     const VNInfo *VNI = LR->getVNInfoAt(Idx);
     // Don't track copies from physregs, it is too expensive.
-    extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS);
+    extendDef(Idx, LocNo, LR, VNI, nullptr, LIS);
   }
 
   // Finally, erase all the undefs.
@@ -714,8 +683,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
 
 void LDVImpl::computeIntervals() {
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
-    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS);
     userValues[i]->mapVirtRegs(this);
   }
 }
@@ -724,9 +692,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
   clear();
   MF = &mf;
   LIS = &pass.getAnalysis<LiveIntervals>();
-  MDT = &pass.getAnalysis<MachineDominatorTree>();
   TRI = mf.getSubtarget().getRegisterInfo();
-  LS.initialize(mf);
   DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
                << mf.getName() << " **********\n");
 
@@ -951,7 +917,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
   while (!(MI = LIS.getInstructionFromIndex(Idx))) {
     // We've reached the beginning of MBB.
     if (Idx == Start) {
-      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
       return I;
     }
     Idx = Idx.getPrevIndex();
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 93c5ca7..623af49 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -59,18 +59,32 @@ public:
   typedef LiveRange::Segment Segment;
   typedef IteratorT iterator;
 
-  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+  /// A counterpart of LiveRange::createDeadDef: Make sure the range has a
+  /// value defined at @p Def.
+  /// If @p ForVNI is null, and there is no value defined at @p Def, a new
+  /// value will be allocated using @p VNInfoAllocator.
+  /// If @p ForVNI is null, the return value is the value defined at @p Def,
+  /// either a pre-existing one, or the one newly created.
+  /// If @p ForVNI is not null, then @p Def should be the location where
+  /// @p ForVNI is defined. If the range does not have a value defined at
+  /// @p Def, the value @p ForVNI will be used instead of allocating a new
+  /// one. If the range already has a value defined at @p Def, it must be
+  /// same as @p ForVNI. In either case, @p ForVNI will be the return value.
+  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator *VNInfoAllocator,
+                        VNInfo *ForVNI) {
     assert(!Def.isDead() && "Cannot define a value at the dead slot");
-
+    assert((!ForVNI || ForVNI->def == Def) &&
+           "If ForVNI is specified, it must match Def");
     iterator I = impl().find(Def);
     if (I == segments().end()) {
-      VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+      VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
       impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
       return VNI;
     }
 
     Segment *S = segmentAt(I);
     if (SlotIndex::isSameInstr(Def, S->start)) {
+      assert((!ForVNI || ForVNI == S->valno) && "Value number mismatch");
       assert(S->valno->def == S->start && "Inconsistent existing value def");
 
       // It is possible to have both normal and early-clobber defs of the same
@@ -84,7 +98,7 @@ public:
       return S->valno;
     }
     assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
-    VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+    VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
     segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
     return VNI;
   }
@@ -93,7 +107,7 @@ public:
     if (segments().empty())
       return nullptr;
     iterator I =
-        impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+      impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
     if (I == segments().begin())
       return nullptr;
     --I;
@@ -104,6 +118,25 @@ public:
     return I->valno;
   }
 
+  std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
+      SlotIndex StartIdx, SlotIndex Use) {
+    if (segments().empty())
+      return std::make_pair(nullptr, false);
+    SlotIndex BeforeUse = Use.getPrevSlot();
+    iterator I = impl().findInsertPos(Segment(BeforeUse, Use, nullptr));
+    if (I == segments().begin())
+      return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+    --I;
+    if (I->end <= StartIdx)
+      return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+    if (I->end < Use) {
+      if (LR->isUndefIn(Undefs, I->end, BeforeUse))
+        return std::make_pair(nullptr, true);
+      extendSegmentEndTo(I, Use);
+    }
+    return std::make_pair(I->valno, false);
+  }
+
   /// This method is used when we want to extend the segment specified
   /// by I to end at the specified endpoint. To do this, we should
   /// merge and eliminate all segments that this will overlap
@@ -320,13 +353,20 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
   return I;
 }
 
-VNInfo *LiveRange::createDeadDef(SlotIndex Def,
-                                  VNInfo::Allocator &VNInfoAllocator) {
+VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).createDeadDef(Def, &VNIAlloc, nullptr);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).createDeadDef(Def, &VNIAlloc, nullptr);
+}
+
+VNInfo *LiveRange::createDeadDef(VNInfo *VNI) {
   // Use the segment set, if it is available.
   if (segmentSet != nullptr)
-    return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+    return CalcLiveRangeUtilSet(this).createDeadDef(VNI->def, nullptr, VNI);
   // Otherwise use the segment vector.
-  return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
+  return CalcLiveRangeUtilVector(this).createDeadDef(VNI->def, nullptr, VNI);
 }
 
 // overlaps - Return true if the intersection of the two live ranges is
@@ -507,9 +547,15 @@ void LiveRange::append(const Segment S) {
   segments.push_back(S);
 }
 
-/// extendInBlock - If this range is live before Kill in the basic
-/// block that starts at StartIdx, extend it to be live up to Kill and return
-/// the value. If there is no live range before Kill, return NULL.
+std::pair<VNInfo*,bool> LiveRange::extendInBlock(ArrayRef<SlotIndex> Undefs,
+    SlotIndex StartIdx, SlotIndex Kill) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).extendInBlock(Undefs, StartIdx, Kill);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).extendInBlock(Undefs, StartIdx, Kill);
+}
+
 VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
   // Use the segment set, if it is available.
   if (segmentSet != nullptr)
@@ -571,7 +617,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
 /// Also remove the value# from value# list.
 void LiveRange::removeValNo(VNInfo *ValNo) {
   if (empty()) return;
-  segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+  segments.erase(remove_if(*this, [ValNo](const Segment &S) {
     return S.valno == ValNo;
   }), end());
   // Now that ValNo is dead, remove it.
@@ -824,6 +870,30 @@ unsigned LiveInterval::getSize() const {
   return Sum;
 }
 
+void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
+                                         LaneBitmask LaneMask,
+                                         const MachineRegisterInfo &MRI,
+                                         const SlotIndexes &Indexes) const {
+  assert(TargetRegisterInfo::isVirtualRegister(reg));
+  LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
+  assert((VRegMask & LaneMask).any());
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  for (const MachineOperand &MO : MRI.def_operands(reg)) {
+    if (!MO.isUndef())
+      continue;
+    unsigned SubReg = MO.getSubReg();
+    assert(SubReg != 0 && "Undef should only be set on subreg defs");
+    LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg);
+    LaneBitmask UndefMask = VRegMask & ~DefMask;
+    if ((UndefMask & LaneMask).any()) {
+      const MachineInstr &MI = *MO.getParent();
+      bool EarlyClobber = MO.isEarlyClobber();
+      SlotIndex Pos = Indexes.getInstructionIndex(MI).getRegSlot(EarlyClobber);
+      Undefs.push_back(Pos);
+    }
+  }
+}
+
 raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
   return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
 }
@@ -912,15 +982,16 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
   super::verify();
 
   // Make sure SubRanges are fine and LaneMasks are disjunct.
-  LaneBitmask Mask = 0;
-  LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+  LaneBitmask Mask;
+  LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg)
+                                       : LaneBitmask::getAll();
   for (const SubRange &SR : subranges()) {
     // Subrange lanemask should be disjunct to any previous subrange masks.
-    assert((Mask & SR.LaneMask) == 0);
+    assert((Mask & SR.LaneMask).none());
     Mask |= SR.LaneMask;
 
     // subrange mask should not contained in maximum lane mask for the vreg.
-    assert((Mask & ~MaxMask) == 0);
+    assert((Mask & ~MaxMask).none());
     // empty subranges must be removed.
     assert(!SR.empty());
 
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 5f3281f..70d3483 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -58,10 +58,6 @@ static cl::opt<bool> EnablePrecomputePhysRegs(
 static bool EnablePrecomputePhysRegs = false;
 #endif // NDEBUG
 
-static cl::opt<bool> EnableSubRegLiveness(
-  "enable-subreg-liveness", cl::Hidden, cl::init(true),
-  cl::desc("Enable subregister liveness tracking."));
-
 namespace llvm {
 cl::opt<bool> UseSegmentSetForPhysRegs(
     "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
@@ -119,9 +115,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   Indexes = &getAnalysis<SlotIndexes>();
   DomTree = &getAnalysis<MachineDominatorTree>();
 
-  if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness())
-    MRI->enableSubRegLiveness(true);
-
   if (!LRCalc)
     LRCalc = new LiveRangeCalc();
 
@@ -504,8 +497,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
   return MayHaveSplitComponents;
 }
 
-void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
-{
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
   DEBUG(dbgs() << "Shrink: " << SR << '\n');
   assert(TargetRegisterInfo::isVirtualRegister(Reg)
          && "Can only shrink virtual registers");
@@ -514,18 +506,19 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
 
   // Visit all instructions reading Reg.
   SlotIndex LastIdx;
-  for (MachineOperand &MO : MRI->reg_operands(Reg)) {
-    MachineInstr *UseMI = MO.getParent();
-    if (UseMI->isDebugValue())
+  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+    // Skip "undef" uses.
+    if (!MO.readsReg())
       continue;
     // Maybe the operand is for a subregister we don't care about.
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
       LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
-      if ((LaneMask & SR.LaneMask) == 0)
+      if ((LaneMask & SR.LaneMask).none())
         continue;
     }
     // We only need to visit each instruction once.
+    MachineInstr *UseMI = MO.getParent();
     SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
     if (Idx == LastIdx)
       continue;
@@ -574,11 +567,12 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
 }
 
 void LiveIntervals::extendToIndices(LiveRange &LR,
-                                    ArrayRef<SlotIndex> Indices) {
+                                    ArrayRef<SlotIndex> Indices,
+                                    ArrayRef<SlotIndex> Undefs) {
   assert(LRCalc && "LRCalc not initialized.");
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
   for (unsigned i = 0, e = Indices.size(); i != e; ++i)
-    LRCalc->extend(LR, Indices[i]);
+    LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
 }
 
 void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -605,7 +599,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
   // Find all blocks that are reachable from KillMBB without leaving VNI's live
   // range. It is possible that KillMBB itself is reachable, so start a DFS
   // from each successor.
-  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
   VisitedTy Visited;
   for (MachineBasicBlock::succ_iterator
        SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
@@ -724,7 +718,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         LaneBitmask DefinedLanesMask;
         if (!SRs.empty()) {
           // Compute a mask of lanes that are defined.
-          DefinedLanesMask = 0;
+          DefinedLanesMask = LaneBitmask::getNone();
           for (auto &SRP : SRs) {
             const LiveInterval::SubRange &SR = *SRP.first;
             LiveRange::const_iterator &I = SRP.second;
@@ -737,7 +731,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
             DefinedLanesMask |= SR.LaneMask;
           }
         } else
-          DefinedLanesMask = ~0u;
+          DefinedLanesMask = LaneBitmask::getAll();
 
         bool IsFullWrite = false;
         for (const MachineOperand &MO : MI->operands()) {
@@ -746,7 +740,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
           if (MO.isUse()) {
             // Reading any undefined lanes?
             LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
-            if ((UseMask & ~DefinedLanesMask) != 0)
+            if ((UseMask & ~DefinedLanesMask).any())
               goto CancelKill;
           } else if (MO.getSubReg() == 0) {
             // Writing to the full register?
@@ -954,14 +948,15 @@ public:
         LiveInterval &LI = LIS.getInterval(Reg);
         if (LI.hasSubRanges()) {
           unsigned SubReg = MO.getSubReg();
-          LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+          LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+                                        : MRI.getMaxLaneMaskForVReg(Reg);
           for (LiveInterval::SubRange &S : LI.subranges()) {
-            if ((S.LaneMask & LaneMask) == 0)
+            if ((S.LaneMask & LaneMask).none())
               continue;
             updateRange(S, Reg, S.LaneMask);
           }
         }
-        updateRange(LI, Reg, 0);
+        updateRange(LI, Reg, LaneBitmask::getNone());
         continue;
       }
 
@@ -969,7 +964,7 @@ public:
       // precomputed live range.
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
         if (LiveRange *LR = getRegUnitLI(*Units))
-          updateRange(*LR, *Units, 0);
+          updateRange(*LR, *Units, LaneBitmask::getNone());
     }
     if (hasRegMask)
       updateRegMaskSlots();
@@ -985,7 +980,7 @@ private:
       dbgs() << "     ";
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         dbgs() << PrintReg(Reg);
-        if (LaneMask != 0)
+        if (LaneMask.any())
           dbgs() << " L" << PrintLaneMask(LaneMask);
       } else {
         dbgs() << PrintRegUnit(Reg, &TRI);
@@ -1039,6 +1034,8 @@ private:
           LiveRange::iterator Prev = std::prev(NewIdxIn);
           Prev->end = NewIdx.getRegSlot();
         }
+        // Extend OldIdxIn.
+        OldIdxIn->end = Next->start;
         return;
       }
 
@@ -1317,8 +1314,8 @@ private:
         if (MO.isUndef())
           continue;
         unsigned SubReg = MO.getSubReg();
-        if (SubReg != 0 && LaneMask != 0
-            && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
+        if (SubReg != 0 && LaneMask.any()
+            && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask).none())
           continue;
 
         const MachineInstr &MI = *MO.getParent();
@@ -1394,6 +1391,11 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
                                         LaneBitmask LaneMask) {
   LiveInterval::iterator LII = LR.find(endIdx);
   SlotIndex lastUseIdx;
+  if (LII == LR.begin()) {
+    // This happens when the function is called for a subregister that only
+    // occurs _after_ the range that is to be repaired.
+    return;
+  }
   if (LII != LR.end() && LII->start < endIdx)
     lastUseIdx = LII->end;
   else
@@ -1420,7 +1422,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
 
       unsigned SubReg = MO.getSubReg();
       LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
-      if ((Mask & LaneMask) == 0)
+      if ((Mask & LaneMask).none())
         continue;
 
       if (MO.isDef()) {
@@ -1538,15 +1540,19 @@ void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
 }
 
 void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+  // LI may not have the main range computed yet, but its subranges may
+  // be present.
   VNInfo *VNI = LI.getVNInfoAt(Pos);
-  if (VNI == nullptr)
-    return;
-  LI.removeValNo(VNI);
+  if (VNI != nullptr) {
+    assert(VNI->def.getBaseIndex() == Pos.getBaseIndex());
+    LI.removeValNo(VNI);
+  }
 
-  // Also remove the value in subranges.
+  // Also remove the value defined in subranges.
   for (LiveInterval::SubRange &S : LI.subranges()) {
     if (VNInfo *SVNI = S.getVNInfoAt(Pos))
-      S.removeValNo(SVNI);
+      if (SVNI->def.getBaseIndex() == Pos.getBaseIndex())
+        S.removeValNo(SVNI);
   }
   LI.removeEmptySubRanges();
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 025d99c..fc2f233 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -102,9 +103,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
 // Scan the vector of interfering virtual registers in this union. Assume it's
 // quite small.
 bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
-  SmallVectorImpl<LiveInterval*>::const_iterator I =
-    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
-  return I != InterferingVRegs.end();
+  return is_contained(InterferingVRegs, VirtReg);
 }
 
 // Collect virtual registers in this union that interfere with this
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 4e2528f..dcc41c1 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -49,7 +49,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
       if (!O->isDef())
         continue;
       unsigned Reg = O->getReg();
-      if (Reg == 0)
+      if (!TargetRegisterInfo::isPhysicalRegister(Reg))
         continue;
       removeReg(Reg);
     } else if (O->isRegMask())
@@ -61,7 +61,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
     if (!O->isReg() || !O->readsReg())
       continue;
     unsigned Reg = O->getReg();
-    if (Reg == 0)
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg))
       continue;
     addReg(Reg);
   }
@@ -77,7 +77,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
   for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
     if (O->isReg()) {
       unsigned Reg = O->getReg();
-      if (Reg == 0)
+      if (!TargetRegisterInfo::isPhysicalRegister(Reg))
         continue;
       if (O->isDef()) {
         // Note, dead defs are still recorded.  The caller should decide how to
@@ -141,9 +141,19 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
 }
 
 /// Add live-in registers of basic block \p MBB to \p LiveRegs.
-static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
-  for (const auto &LI : MBB.liveins())
-    LiveRegs.addReg(LI.PhysReg);
+void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
+  for (const auto &LI : MBB.liveins()) {
+    MCSubRegIndexIterator S(LI.PhysReg, TRI);
+    if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) {
+      addReg(LI.PhysReg);
+      continue;
+    }
+    for (; S.isValid(); ++S) {
+      unsigned SI = S.getSubRegIndex();
+      if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any())
+        addReg(S.getSubReg());
+    }
+  }
 }
 
 /// Add pristine registers to the given \p LiveRegs. This function removes
@@ -160,12 +170,12 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
 void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
   // To get the live-outs we simply merge the live-ins of all successors.
   for (const MachineBasicBlock *Succ : MBB.successors())
-    ::addLiveIns(*this, *Succ);
+    addBlockLiveIns(*Succ);
 }
 
 void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.isCalleeSavedInfoValid()) {
     if (MBB.isReturnBlock()) {
       // The return block has no successors whose live-ins we could merge
@@ -182,8 +192,31 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
 
 void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.isCalleeSavedInfoValid())
     addPristines(*this, MF, MFI, *TRI);
-  ::addLiveIns(*this, MBB);
+  addBlockLiveIns(MBB);
+}
+
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+                          MachineBasicBlock &MBB) {
+  assert(MBB.livein_empty());
+  LiveRegs.init(TRI);
+  LiveRegs.addLiveOutsNoPristines(MBB);
+  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+    LiveRegs.stepBackward(MI);
+
+  for (unsigned Reg : LiveRegs) {
+    // Skip the register if we are about to add one of its super registers.
+    bool ContainsSuperReg = false;
+    for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
+      if (LiveRegs.contains(*SReg)) {
+        ContainsSuperReg = true;
+        break;
+      }
+    }
+    if (ContainsSuperReg)
+      continue;
+    MBB.addLiveIn(Reg);
+  }
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index db91ca1..0128376 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LiveRangeCalc.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
@@ -23,6 +24,7 @@ void LiveRangeCalc::resetLiveOutMap() {
   unsigned NumBlocks = MF->getNumBlockIDs();
   Seen.clear();
   Seen.resize(NumBlocks);
+  EntryInfoMap.clear();
   Map.resize(NumBlocks);
 }
 
@@ -64,9 +66,8 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
 
     unsigned SubReg = MO.getSubReg();
     if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
-      LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
-                                     : MRI->getMaxLaneMaskForVReg(Reg);
-
+      LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+                                        : MRI->getMaxLaneMaskForVReg(Reg);
       // If this is the first time we see a subregister def, initialize
       // subranges by creating a copy of the main range.
       if (!LI.hasSubRanges() && !LI.empty()) {
@@ -74,17 +75,19 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
         LI.createSubRangeFrom(*Alloc, ClassMask, LI);
       }
 
+      LaneBitmask Mask = SubMask;
       for (LiveInterval::SubRange &S : LI.subranges()) {
         // A Mask for subregs common to the existing subrange and current def.
         LaneBitmask Common = S.LaneMask & Mask;
-        if (Common == 0)
+        if (Common.none())
           continue;
-        // A Mask for subregs covered by the subrange but not the current def.
-        LaneBitmask LRest = S.LaneMask & ~Mask;
         LiveInterval::SubRange *CommonRange;
-        if (LRest != 0) {
-          // Split current subrange into Common and LRest ranges.
-          S.LaneMask = LRest;
+        // A Mask for subregs covered by the subrange but not the current def.
+        LaneBitmask RM = S.LaneMask & ~Mask;
+        if (RM.any()) {
+          // Split the subrange S into two parts: one covered by the current
+          // def (CommonRange), and the one not affected by it (updated S).
+          S.LaneMask = RM;
           CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
         } else {
           assert(Common == S.LaneMask);
@@ -95,7 +98,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
         Mask &= ~Common;
       }
       // Create a new SubRange for subregs we did not cover yet.
-      if (Mask != 0) {
+      if (Mask.any()) {
         LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
         if (MO.isDef())
           createDeadDef(*Indexes, *Alloc, *NewRange, MO);
@@ -116,14 +119,15 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
   // necessary.
   if (LI.hasSubRanges()) {
     for (LiveInterval::SubRange &S : LI.subranges()) {
-      resetLiveOutMap();
-      extendToUses(S, Reg, S.LaneMask);
+      LiveRangeCalc SubLRC;
+      SubLRC.reset(MF, Indexes, DomTree, Alloc);
+      SubLRC.extendToUses(S, Reg, S.LaneMask, &LI);
     }
     LI.clear();
     constructMainRangeFromSubranges(LI);
   } else {
     resetLiveOutMap();
-    extendToUses(LI, Reg, ~0u);
+    extendToUses(LI, Reg, LaneBitmask::getAll());
   }
 }
 
@@ -139,9 +143,8 @@ void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
         MainRange.createDeadDef(VNI->def, *Alloc);
     }
   }
-
   resetLiveOutMap();
-  extendToUses(MainRange, LI.reg);
+  extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
 }
 
 void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
@@ -154,29 +157,34 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
 }
 
 
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
-                                 LaneBitmask Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
+                                 LiveInterval *LI) {
+  SmallVector<SlotIndex, 4> Undefs;
+  if (LI != nullptr)
+    LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
+
   // Visit all operands that read Reg. This may include partial defs.
+  bool IsSubRange = !Mask.all();
   const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
   for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
     // Clear all kill flags. They will be reinserted after register allocation
     // by LiveIntervalAnalysis::addKillFlags().
     if (MO.isUse())
       MO.setIsKill(false);
-    else {
-      // We only care about uses, but on the main range (mask ~0u) this includes
-      // the "virtual" reads happening for subregister defs.
-      if (Mask != ~0u)
-        continue;
-    }
-
-    if (!MO.readsReg())
+    // MO::readsReg returns "true" for subregister defs. This is for keeping
+    // liveness of the entire register (i.e. for the main range of the live
+    // interval). For subranges, definitions of non-overlapping subregisters
+    // do not count as uses.
+    if (!MO.readsReg() || (IsSubRange && MO.isDef()))
       continue;
+
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
-      LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
-      // Ignore uses not covering the current subrange.
-      if ((SubRegMask & Mask) == 0)
+      LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
+      if (MO.isDef())
+        SLM = ~SLM;
+      // Ignore uses not reading the current (sub)range.
+      if ((SLM & Mask).none())
         continue;
     }
 
@@ -205,7 +213,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
 
     // MI is reading Reg. We may have visited MI before if it happens to be
     // reading Reg multiple times. That is OK, extend() is idempotent.
-    extend(LR, UseIdx, Reg);
+    extend(LR, UseIdx, Reg, Undefs);
   }
 }
 
@@ -235,8 +243,8 @@ void LiveRangeCalc::updateFromLiveIns() {
   LiveIn.clear();
 }
 
-
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+                           ArrayRef<SlotIndex> Undefs) {
   assert(Use.isValid() && "Invalid SlotIndex");
   assert(Indexes && "Missing SlotIndexes");
   assert(DomTree && "Missing dominator tree");
@@ -245,14 +253,15 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
   assert(UseMBB && "No MBB at Use");
 
   // Is there a def in the same MBB we can extend?
-  if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
+  auto EP = LR.extendInBlock(Undefs, Indexes->getMBBStartIdx(UseMBB), Use);
+  if (EP.first != nullptr || EP.second)
     return;
 
   // Find the single reaching def, or determine if Use is jointly dominated by
   // multiple values, and we may need to create even more phi-defs to preserve
   // VNInfo SSA form.  Perform a search for all predecessor blocks where we
   // know the dominating VNInfo.
-  if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
+  if (findReachingDefs(LR, *UseMBB, Use, PhysReg, Undefs))
     return;
 
   // When there were multiple different values, we may need new PHIs.
@@ -271,8 +280,72 @@ void LiveRangeCalc::calculateValues() {
 }
 
 
+bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+                                 MachineBasicBlock &MBB, BitVector &DefOnEntry,
+                                 BitVector &UndefOnEntry) {
+  unsigned BN = MBB.getNumber();
+  if (DefOnEntry[BN])
+    return true;
+  if (UndefOnEntry[BN])
+    return false;
+
+  auto MarkDefined =
+        [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+    for (MachineBasicBlock *S : B.successors())
+      DefOnEntry[S->getNumber()] = true;
+    DefOnEntry[BN] = true;
+    return true;
+  };
+
+  SetVector<unsigned> WorkList;
+  // Checking if the entry of MBB is reached by some def: add all predecessors
+  // that are potentially defined-on-exit to the work list.
+  for (MachineBasicBlock *P : MBB.predecessors())
+    WorkList.insert(P->getNumber());
+
+  for (unsigned i = 0; i != WorkList.size(); ++i) {
+    // Determine if the exit from the block is reached by some def.
+    unsigned N = WorkList[i];
+    MachineBasicBlock &B = *MF->getBlockNumbered(N);
+    if (Seen[N] && Map[&B].first != nullptr)
+      return MarkDefined(B);
+    SlotIndex Begin, End;
+    std::tie(Begin, End) = Indexes->getMBBRange(&B);
+    LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+    if (UB != LR.begin()) {
+      LiveRange::Segment &Seg = *std::prev(UB);
+      if (Seg.end > Begin) {
+        // There is a segment that overlaps B. If the range is not explicitly
+        // undefined between the end of the segment and the end of the block,
+        // treat the block as defined on exit. If it is, go to the next block
+        // on the work list.
+        if (LR.isUndefIn(Undefs, Seg.end, End))
+          continue;
+        return MarkDefined(B);
+      }
+    }
+
+    // No segment overlaps with this block. If this block is not defined on
+    // entry, or it undefines the range, do not process its predecessors.
+    if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) {
+      UndefOnEntry[N] = true;
+      continue;
+    }
+    if (DefOnEntry[N])
+      return MarkDefined(B);
+
+    // Still don't know: add all predecessors to the work list.
+    for (MachineBasicBlock *P : B.predecessors())
+      WorkList.insert(P->getNumber());
+  }
+
+  UndefOnEntry[BN] = true;
+  return false;
+}
+
 bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
-                                     SlotIndex Use, unsigned PhysReg) {
+                                     SlotIndex Use, unsigned PhysReg,
+                                     ArrayRef<SlotIndex> Undefs) {
   unsigned UseMBBNum = UseMBB.getNumber();
 
   // Block numbers where LR should be live-in.
@@ -282,6 +355,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   bool UniqueVNI = true;
   VNInfo *TheVNI = nullptr;
 
+  bool FoundUndef = false;
+
   // Using Seen as a visited set, perform a BFS for all reaching defs.
   for (unsigned i = 0; i != WorkList.size(); ++i) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
@@ -294,18 +369,20 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
       const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
       if (MI != nullptr)
         errs() << Use << " " << *MI;
-      llvm_unreachable("Use not jointly dominated by defs.");
+      report_fatal_error("Use not jointly dominated by defs.");
     }
 
     if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
         !MBB->isLiveIn(PhysReg)) {
       MBB->getParent()->verify();
-      errs() << "The register " << PrintReg(PhysReg)
+      const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+      errs() << "The register " << PrintReg(PhysReg, TRI)
              << " needs to be live in to BB#" << MBB->getNumber()
              << ", but is missing from the live-in list.\n";
-      llvm_unreachable("Invalid global physical register");
+      report_fatal_error("Invalid global physical register");
     }
 #endif
+    FoundUndef |= MBB->pred_empty();
 
     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
          PE = MBB->pred_end(); PI != PE; ++PI) {
@@ -326,18 +403,21 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 
        // First time we see Pred.  Try to determine the live-out value, but set
        // it as null if Pred is live-through with an unknown value.
-       VNInfo *VNI = LR.extendInBlock(Start, End);
+       auto EP = LR.extendInBlock(Undefs, Start, End);
+       VNInfo *VNI = EP.first;
+       FoundUndef |= EP.second;
        setLiveOutValue(Pred, VNI);
        if (VNI) {
          if (TheVNI && TheVNI != VNI)
            UniqueVNI = false;
          TheVNI = VNI;
-         continue;
        }
+       if (VNI || EP.second)
+         continue;
 
        // No, we need a live-in value for Pred as well
        if (Pred != &UseMBB)
-          WorkList.push_back(Pred->getNumber());
+         WorkList.push_back(Pred->getNumber());
        else
           // Loopback to UseMBB, so value is really live through.
          Use = SlotIndex();
@@ -345,6 +425,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   }
 
   LiveIn.clear();
+  FoundUndef |= (TheVNI == nullptr);
+  if (Undefs.size() > 0 && FoundUndef)
+    UniqueVNI = false;
 
   // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
   // neither require it. Skip the sorting overhead for small updates.
@@ -353,27 +436,39 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 
   // If a unique reaching def was found, blit in the live ranges immediately.
   if (UniqueVNI) {
+    assert(TheVNI != nullptr);
     LiveRangeUpdater Updater(&LR);
-    for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
-         E = WorkList.end(); I != E; ++I) {
-       SlotIndex Start, End;
-       std::tie(Start, End) = Indexes->getMBBRange(*I);
-       // Trim the live range in UseMBB.
-       if (*I == UseMBBNum && Use.isValid())
-         End = Use;
-       else
-         Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
-       Updater.add(Start, End, TheVNI);
+    for (unsigned BN : WorkList) {
+      SlotIndex Start, End;
+      std::tie(Start, End) = Indexes->getMBBRange(BN);
+      // Trim the live range in UseMBB.
+      if (BN == UseMBBNum && Use.isValid())
+        End = Use;
+      else
+        Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr);
+      Updater.add(Start, End, TheVNI);
     }
     return true;
   }
 
+  // Prepare the defined/undefined bit vectors.
+  auto EF = EntryInfoMap.find(&LR);
+  if (EF == EntryInfoMap.end()) {
+    unsigned N = MF->getNumBlockIDs();
+    EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
+    EF->second.first.resize(N);
+    EF->second.second.resize(N);
+  }
+  BitVector &DefOnEntry = EF->second.first;
+  BitVector &UndefOnEntry = EF->second.second;
+
   // Multiple values were found, so transfer the work list to the LiveIn array
   // where UpdateSSA will use it as a work list.
   LiveIn.reserve(WorkList.size());
-  for (SmallVectorImpl<unsigned>::const_iterator
-       I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+  for (unsigned BN : WorkList) {
+    MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
+    if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+      continue;
     addLiveInBlock(LR, DomTree->getNode(MBB));
     if (MBB == &UseMBB)
       LiveIn.back().Kill = Use;
@@ -458,10 +553,12 @@ void LiveRangeCalc::updateSSA() {
         I.DomNode = nullptr;
 
         // Add liveness since updateFromLiveIns now skips this node.
-        if (I.Kill.isValid())
-          LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
-        else {
-          LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+        if (I.Kill.isValid()) {
+          if (VNI)
+            LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
+        } else {
+          if (VNI)
+            LR.addSegment(LiveInterval::Segment(Start, End, VNI));
           LOP = LiveOutPair(VNI, Node);
         }
       } else if (IDomValue.first) {
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 9de48b7..1a7598f 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
 #define LLVM_LIB_CODEGEN_LIVERANGECALC_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/CodeGen/LiveInterval.h"
@@ -53,6 +54,19 @@ class LiveRangeCalc {
   /// when switching live ranges.
   BitVector Seen;
 
+  /// Map LiveRange to sets of blocks (represented by bit vectors) that
+  /// in the live range are defined on entry and undefined on entry.
+  /// A block is defined on entry if there is a path from at least one of
+  /// the defs in the live range to the entry of the block, and conversely,
+  /// a block is undefined on entry, if there is no such path (i.e. no
+  /// definition reaches the entry of the block). A single LiveRangeCalc
+  /// object is used to track live-out information for multiple registers
+  /// in live range splitting (which is ok, since the live ranges of these
+  /// registers do not overlap), but the defined/undefined information must
+  /// be kept separate for each individual range.
+  /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
+  std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+
   /// Map each basic block where a live range is live out to the live-out value
   /// and its defining block.
   ///
@@ -101,18 +115,31 @@ class LiveRangeCalc {
   /// used to add entries directly.
   SmallVector<LiveInBlock, 16> LiveIn;
 
-  /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
-  /// reach it.
+  /// Check if the entry to block @p MBB can be reached by any of the defs
+  /// in @p LR. Return true if none of the defs reach the entry to @p MBB.
+  bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+                    MachineBasicBlock &MBB, BitVector &DefOnEntry,
+                    BitVector &UndefOnEntry);
+
+  /// Find the set of defs that can reach @p Kill. @p Kill must belong to
+  /// @p UseMBB.
   ///
-  /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
-  /// are added to @p LR, and the function returns true.
+  /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill,
+  /// all paths from the def to @p UseMBB are added to @p LR, and the function
+  /// returns true.
   ///
   /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
   /// live in are added to the LiveIn array, and the function returns false.
   ///
+  /// The array @p Undef provides the locations where the range @p LR becomes
+  /// undefined by <def,read-undef> operands on other subranges. If @p Undef
+  /// is non-empty and @p Kill is jointly dominated only by the entries of
+  /// @p Undef, the function returns false.
+  ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
   bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
-                        SlotIndex Kill, unsigned PhysReg);
+                        SlotIndex Kill, unsigned PhysReg,
+                        ArrayRef<SlotIndex> Undefs);
 
   /// updateSSA - Compute the values that will be live in to all requested
   /// blocks in LiveIn.  Create PHI-def values as required to preserve SSA form.
@@ -127,9 +154,16 @@ class LiveRangeCalc {
 
   /// Extend the live range of @p LR to reach all uses of Reg.
   ///
-  /// All uses must be jointly dominated by existing liveness.  PHI-defs are
-  /// inserted as needed to preserve SSA form.
-  void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
+  /// If @p LR is a main range, or if @p LI is null, then all uses must be
+  /// jointly dominated by the definitions from @p LR. If @p LR is a subrange
+  /// of the live interval @p LI, corresponding to lane mask @p LaneMask,
+  /// all uses must be jointly dominated by the definitions from @p LR
+  /// together with definitions of other lanes where @p LR becomes undefined
+  /// (via <def,read-undef> operands).
+  /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e.
+  /// LaneBitmask::getAll().
+  void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask,
+                    LiveInterval *LI = nullptr);
 
   /// Reset Map and Seen fields.
   void resetLiveOutMap();
@@ -169,7 +203,8 @@ public:
   /// inserted as required to preserve SSA form.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
+  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+              ArrayRef<SlotIndex> Undefs);
 
   /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
   /// Each instruction defining Reg gets a new VNInfo with a corresponding
@@ -181,7 +216,7 @@ public:
   /// All uses must be jointly dominated by existing liveness.  PHI-defs are
   /// inserted as needed to preserve SSA form.
   void extendToUses(LiveRange &LR, unsigned PhysReg) {
-    extendToUses(LR, PhysReg, ~0u);
+    extendToUses(LR, PhysReg, LaneBitmask::getAll());
   }
 
   /// Calculates liveness for the register specified in live interval @p LI.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index b35c0ad..7f1c69c 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,13 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
     VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
   }
   LiveInterval &LI = LIS.createEmptyInterval(VReg);
+  // Create empty subranges if the OldReg's interval has them. Do not create
+  // the main range here---it will be constructed later after the subranges
+  // have been finalized.
+  LiveInterval &OldLI = LIS.getInterval(OldReg);
+  VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+  for (LiveInterval::SubRange &S : OldLI.subranges())
+    LI.createSubRange(Alloc, S.LaneMask);
   return LI;
 }
 
@@ -66,6 +73,8 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
     unsigned Original = VRM->getOriginal(getReg());
     LiveInterval &OrigLI = LIS.getInterval(Original);
     VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+    if (!OrigVNI)
+      continue;
     MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
     if (!DefMI)
       continue;
@@ -94,7 +103,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
     // We can't remat physreg uses, unless it is a constant.
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-      if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+      if (MRI.isConstantPhysReg(MO.getReg()))
         continue;
       return false;
     }
@@ -227,7 +236,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
   unsigned SubReg = MO.getSubReg();
   LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
   for (const LiveInterval::SubRange &S : LI.subranges()) {
-    if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
+    if ((S.LaneMask & LaneMask).any() && S.Query(Idx).isKill())
       return true;
   }
   return false;
@@ -263,7 +272,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
   bool ReadsPhysRegs = false;
   bool isOrigDef = false;
   unsigned Dest;
-  if (VRM && MI->getOperand(0).isReg()) {
+  // Only optimize rematerialize case when the instruction has one def, since
+  // otherwise we could leave some dead defs in the code.  This case is
+  // extremely rare.
+  if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+      MI->getDesc().getNumDefs() == 1) {
     Dest = MI->getOperand(0).getReg();
     unsigned Original = VRM->getOriginal(Dest);
     LiveInterval &OrigLI = LIS.getInterval(Original);
@@ -335,6 +348,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
     // allocations of the func are done.
     if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
       LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+      NewLI.removeEmptySubRanges();
       VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
       NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
       pop_back();
@@ -428,6 +442,9 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
   if (VRM)
     VRM->grow();
 
+  if (Parent && !Parent->isSpillable())
+    LIS.getInterval(VReg).markNotSpillable();
+
   NewRegs.push_back(VReg);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 7ee87c1..7a51386 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -70,15 +70,16 @@ void LiveRegMatrix::releaseMemory() {
   }
 }
 
-template<typename Callable>
-bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
-                 unsigned PhysReg, Callable Func) {
+template <typename Callable>
+static bool foreachUnit(const TargetRegisterInfo *TRI,
+                        LiveInterval &VRegInterval, unsigned PhysReg,
+                        Callable Func) {
   if (VRegInterval.hasSubRanges()) {
     for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
       unsigned Unit = (*Units).first;
       LaneBitmask Mask = (*Units).second;
       for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
-        if (S.LaneMask & Mask) {
+        if ((S.LaneMask & Mask).any()) {
           if (Func(Unit, S))
             return true;
           break;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index dd87216..269b990a31 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -643,7 +643,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   // register before its uses due to dominance properties of SSA (except for PHI
   // nodes, which are treated as a special case).
   MachineBasicBlock *Entry = &MF->front();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  df_iterator_default_set<MachineBasicBlock*,16> Visited;
 
   for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
     runOnBlock(MBB, NumRegs);
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index af7392f..e189fb0 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,12 +51,21 @@ namespace {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
     int FrameIdx;                   // The frame index
+
+    // Order reference instruction appears in program. Used to ensure
+    // deterministic order when multiple instructions may reference the same
+    // location.
+    unsigned Order;
+
   public:
-    FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
-      MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
+    FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
+      MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+
     bool operator<(const FrameRef &RHS) const {
-      return LocalOffset < RHS.LocalOffset;
+      return std::tie(LocalOffset, FrameIdx, Order) <
+             std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
     }
+
     MachineBasicBlock::iterator getMachineInstr() const { return MI; }
     int64_t getLocalOffset() const { return LocalOffset; }
     int getFrameIndex() const { return FrameIdx; }
@@ -67,17 +76,17 @@ namespace {
     /// StackObjSet - A set of stack object indexes
     typedef SmallSetVector<int, 8> StackObjSet;
 
-    void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+    void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
                            bool StackGrowsDown, unsigned &MaxAlign);
     void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                                SmallSet<int, 16> &ProtectedObjs,
-                               MachineFrameInfo *MFI, bool StackGrowsDown,
+                               MachineFrameInfo &MFI, bool StackGrowsDown,
                                int64_t &Offset, unsigned &MaxAlign);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     bool insertFrameReferenceRegisters(MachineFunction &Fn);
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { 
+    explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
       initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
     }
     bool runOnMachineFunction(MachineFunction &MF) override;
@@ -102,9 +111,9 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
 
 
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+  unsigned LocalObjectCount = MFI.getObjectIndexEnd();
 
   // If the target doesn't want/need this pass, or if there are no locals
   // to consider, early exit.
@@ -112,7 +121,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
     return true;
 
   // Make sure we have enough space to store the local offsets.
-  LocalOffsets.resize(MFI->getObjectIndexEnd());
+  LocalOffsets.resize(MFI.getObjectIndexEnd());
 
   // Lay out the local blob.
   calculateFrameObjectOffsets(MF);
@@ -125,21 +134,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   // Otherwise, PEI can do a bit better job of getting the alignment right
   // without a hole at the start since it knows the alignment of the stack
   // at the start of local allocation, and this pass doesn't.
-  MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+  MFI.setUseLocalStackAllocationBlock(UsedBaseRegs);
 
   return true;
 }
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
                                            int FrameIdx, int64_t &Offset,
                                            bool StackGrowsDown,
                                            unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -154,10 +163,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
   // Keep the offset available for base register allocation
   LocalOffsets[FrameIdx] = LocalOffset;
   // And tell MFI about it for PEI to use later
-  MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+  MFI.mapLocalFrameObject(FrameIdx, LocalOffset);
 
   if (!StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
   ++NumAllocations;
 }
@@ -166,7 +175,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
 /// those required to be close to the Stack Protector) to stack offsets.
 void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                                            SmallSet<int, 16> &ProtectedObjs,
-                                           MachineFrameInfo *MFI,
+                                           MachineFrameInfo &MFI,
                                            bool StackGrowsDown, int64_t &Offset,
                                            unsigned &MaxAlign) {
 
@@ -183,7 +192,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
 ///
 void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   bool StackGrowsDown =
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
@@ -194,22 +203,22 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI->getStackProtectorIndex() >= 0) {
+  if (MFI.getStackProtectorIndex() >= 0) {
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
                       StackGrowsDown, MaxAlign);
 
     // Assign large stack objects first.
-    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-      if (MFI->isDeadObjectIndex(i))
+    for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+      if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI->getStackProtectorIndex() == (int)i)
+      if (MFI.getStackProtectorIndex() == (int)i)
         continue;
 
-      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
       case StackProtector::SSPLK_None:
         continue;
       case StackProtector::SSPLK_SmallArray:
@@ -235,10 +244,10 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isDeadObjectIndex(i))
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI->getStackProtectorIndex() == (int)i)
+    if (MFI.getStackProtectorIndex() == (int)i)
       continue;
     if (ProtectedObjs.count(i))
       continue;
@@ -247,8 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Remember how big this blob of stack space is
-  MFI->setLocalFrameSize(Offset);
-  MFI->setLocalFrameMaxAlign(MaxAlign);
+  MFI.setLocalFrameSize(Offset);
+  MFI.setLocalFrameMaxAlign(MaxAlign);
 }
 
 static inline bool
@@ -273,7 +282,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // and ask the target to create a defining instruction for it.
   bool UsedBaseReg = false;
 
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   bool StackGrowsDown =
@@ -285,6 +294,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
 
+  unsigned Order = 0;
+
   for (MachineBasicBlock &BB : Fn) {
     for (MachineInstr &MI : BB) {
       // Debug value, stackmap and patchpoint instructions can't be out of
@@ -305,21 +316,22 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
         // an object allocated in the local block.
         if (MI.getOperand(i).isFI()) {
           // Don't try this with values not in the local block.
-          if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex()))
+          if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
             break;
           int Idx = MI.getOperand(i).getIndex();
           int64_t LocalOffset = LocalOffsets[Idx];
           if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
             break;
-          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx));
+          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
           break;
         }
       }
     }
   }
 
-  // Sort the frame references by local offset
-  array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+  // Sort the frame references by local offset.
+  // Use frame index as a tie-breaker in case MI's have the same offset.
+  std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
 
   MachineBasicBlock *Entry = &Fn.front();
 
@@ -332,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     MachineInstr &MI = *FR.getMachineInstr();
     int64_t LocalOffset = FR.getLocalOffset();
     int FrameIdx = FR.getFrameIndex();
-    assert(MFI->isObjectPreAllocated(FrameIdx) &&
+    assert(MFI.isObjectPreAllocated(FrameIdx) &&
            "Only pre-allocated locals expected!");
 
     DEBUG(dbgs() << "Considering: " << MI);
@@ -349,7 +361,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     assert(idx < MI.getNumOperands() && "Cannot find FI operand");
 
     int64_t Offset = 0;
-    int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+    int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0;
 
     DEBUG(dbgs() << "  Replacing FI in: " << MI);
 
diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
new file mode 100644
index 0000000..d74b730
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
@@ -0,0 +1,71 @@
+//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+LLT::LLT(Type &Ty, const DataLayout &DL) {
+  if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+    SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    ElementsOrAddrSpace = VTy->getNumElements();
+    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+  } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+    Kind = Pointer;
+    SizeInBits = DL.getTypeSizeInBits(&Ty);
+    ElementsOrAddrSpace = PTy->getAddressSpace();
+  } else if (Ty.isSized()) {
+    // Aggregates are no different from real scalars as far as GlobalISel is
+    // concerned.
+    Kind = Scalar;
+    SizeInBits = DL.getTypeSizeInBits(&Ty);
+    ElementsOrAddrSpace = 1;
+    assert(SizeInBits != 0 && "invalid zero-sized type");
+  } else {
+    Kind = Invalid;
+    SizeInBits = ElementsOrAddrSpace = 0;
+  }
+}
+
+LLT::LLT(MVT VT) {
+  if (VT.isVector()) {
+    SizeInBits = VT.getVectorElementType().getSizeInBits();
+    ElementsOrAddrSpace = VT.getVectorNumElements();
+    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+  } else if (VT.isValid()) {
+    // Aggregates are no different from real scalars as far as GlobalISel is
+    // concerned.
+    Kind = Scalar;
+    SizeInBits = VT.getSizeInBits();
+    ElementsOrAddrSpace = 1;
+    assert(SizeInBits != 0 && "invalid zero-sized type");
+  } else {
+    Kind = Invalid;
+    SizeInBits = ElementsOrAddrSpace = 0;
+  }
+}
+
+void LLT::print(raw_ostream &OS) const {
+  if (isVector())
+    OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
+  else if (isPointer())
+    OS << "p" << getAddressSpace();
+  else if (isValid()) {
+    assert(isScalar() && "unexpected type");
+    OS << "s" << getScalarSizeInBits();
+  } else
+    llvm_unreachable("trying to print an invalid type");
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 6e3de52..1f1ce6e 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -173,14 +173,20 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
   return C;
 }
 
-static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
-  if (C.peek() != 'i' || !isdigit(C.peek(1)))
+static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) {
+  if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') ||
+      !isdigit(C.peek(1)))
     return None;
+  char Kind = C.peek();
   auto Range = C;
-  C.advance(); // Skip 'i'
+  C.advance(); // Skip 'i', 's', or 'p'
   while (isdigit(C.peek()))
     C.advance();
-  Token.reset(MIToken::IntegerType, Range.upto(C));
+
+  Token.reset(Kind == 'i'
+                  ? MIToken::IntegerType
+                  : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType),
+              Range.upto(C));
   return C;
 }
 
@@ -199,12 +205,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("tied-def", MIToken::kw_tied_def)
       .Case("frame-setup", MIToken::kw_frame_setup)
       .Case("debug-location", MIToken::kw_debug_location)
-      .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
-      .Case(".cfi_offset", MIToken::kw_cfi_offset)
-      .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
-      .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
-      .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+      .Case("same_value", MIToken::kw_cfi_same_value)
+      .Case("offset", MIToken::kw_cfi_offset)
+      .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+      .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+      .Case("def_cfa", MIToken::kw_cfi_def_cfa)
       .Case("blockaddress", MIToken::kw_blockaddress)
+      .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
       .Case("half", MIToken::kw_half)
       .Case("float", MIToken::kw_float)
@@ -215,6 +222,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("target-flags", MIToken::kw_target_flags)
       .Case("volatile", MIToken::kw_volatile)
       .Case("non-temporal", MIToken::kw_non_temporal)
+      .Case("dereferenceable", MIToken::kw_dereferenceable)
       .Case("invariant", MIToken::kw_invariant)
       .Case("align", MIToken::kw_align)
       .Case("stack", MIToken::kw_stack)
@@ -227,11 +235,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("landing-pad", MIToken::kw_landing_pad)
       .Case("liveins", MIToken::kw_liveins)
       .Case("successors", MIToken::kw_successors)
+      .Case("floatpred", MIToken::kw_floatpred)
+      .Case("intpred", MIToken::kw_intpred)
       .Default(MIToken::Identifier);
 }
 
 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
-  if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
+  if (!isalpha(C.peek()) && C.peek() != '_')
     return None;
   auto Range = C;
   while (isIdentifierChar(C.peek()))
@@ -366,6 +376,11 @@ static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
   return C;
 }
 
+/// Returns true for a character allowed in a register name.
+static bool isRegisterChar(char C) {
+  return isIdentifierChar(C) && C != '.';
+}
+
 static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
   if (C.peek() != '%')
     return None;
@@ -373,7 +388,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
     return lexVirtualRegister(C, Token);
   auto Range = C;
   C.advance(); // Skip '%'
-  while (isIdentifierChar(C.peek()))
+  while (isRegisterChar(C.peek()))
     C.advance();
   Token.reset(MIToken::NamedRegister, Range.upto(C))
       .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
@@ -409,19 +424,6 @@ static bool isValidHexFloatingPointPrefix(char C) {
   return C == 'H' || C == 'K' || C == 'L' || C == 'M';
 }
 
-static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
-  if (C.peek() != '0' || C.peek(1) != 'x')
-    return None;
-  Cursor Range = C;
-  C.advance(2); // Skip '0x'
-  if (isValidHexFloatingPointPrefix(C.peek()))
-    C.advance();
-  while (isxdigit(C.peek()))
-    C.advance();
-  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
-  return C;
-}
-
 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
   C.advance();
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
@@ -438,6 +440,28 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
   return C;
 }
 
+static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
+  if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
+    return None;
+  Cursor Range = C;
+  C.advance(2);
+  unsigned PrefLen = 2;
+  if (isValidHexFloatingPointPrefix(C.peek())) {
+    C.advance();
+    PrefLen++;
+  }
+  while (isxdigit(C.peek()))
+    C.advance();
+  StringRef StrVal = Range.upto(C);
+  if (StrVal.size() <= PrefLen)
+    return None;
+  if (PrefLen == 2)
+    Token.reset(MIToken::HexLiteral, Range.upto(C));
+  else // It must be 3, which means that there was a floating-point prefix.
+    Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+  return C;
+}
+
 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
   if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
     return None;
@@ -485,6 +509,8 @@ static MIToken::TokenKind symbolToken(char C) {
   switch (C) {
   case ',':
     return MIToken::comma;
+  case '.':
+    return MIToken::dot;
   case '=':
     return MIToken::equal;
   case ':':
@@ -566,7 +592,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return C.remaining();
   }
 
-  if (Cursor R = maybeLexIntegerType(C, Token))
+  if (Cursor R = maybeLexIntegerOrScalarType(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
     return R.remaining();
@@ -592,7 +618,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return R.remaining();
   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
     return R.remaining();
-  if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+  if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexNumericalLiteral(C, Token))
     return R.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 32fc8ab..edba749 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -38,6 +38,7 @@ struct MIToken {
     underscore,
     colon,
     coloncolon,
+    dot,
     exclaim,
     lparen,
     rparen,
@@ -53,6 +54,7 @@ struct MIToken {
     kw_implicit_define,
     kw_def,
     kw_dead,
+    kw_dereferenceable,
     kw_killed,
     kw_undef,
     kw_internal,
@@ -67,6 +69,7 @@ struct MIToken {
     kw_cfi_def_cfa_offset,
     kw_cfi_def_cfa,
     kw_blockaddress,
+    kw_intrinsic,
     kw_target_index,
     kw_half,
     kw_float,
@@ -89,6 +92,8 @@ struct MIToken {
     kw_landing_pad,
     kw_liveins,
     kw_successors,
+    kw_floatpred,
+    kw_intpred,
 
     // Named metadata keywords
     md_tbaa,
@@ -102,6 +107,8 @@ struct MIToken {
     NamedRegister,
     MachineBasicBlockLabel,
     MachineBasicBlock,
+    PointerType,
+    ScalarType,
     StackObject,
     FixedStackObject,
     NamedGlobalValue,
@@ -111,6 +118,7 @@ struct MIToken {
     // Other tokens
     IntegerLiteral,
     FloatingPointLiteral,
+    HexLiteral,
     VirtualRegister,
     ConstantPoolItem,
     JumpTableIndex,
@@ -160,7 +168,7 @@ public:
 
   bool isMemoryOperandFlag() const {
     return Kind == kw_volatile || Kind == kw_non_temporal ||
-           Kind == kw_invariant;
+           Kind == kw_dereferenceable || Kind == kw_invariant;
   }
 
   bool is(TokenKind K) const { return Kind == K; }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index b3fd16f..c8bed08 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,6 +14,7 @@
 #include "MIParser.h"
 #include "MILexer.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,13 +27,16 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
 
 using namespace llvm;
 
@@ -41,6 +45,17 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
   : MF(MF), SM(&SM), IRSlots(IRSlots) {
 }
 
+VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
+  auto I = VRegInfos.insert(std::make_pair(Num, nullptr));
+  if (I.second) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    VRegInfo *Info = new (Allocator) VRegInfo;
+    Info->VReg = MRI.createIncompleteVirtualRegister();
+    I.first->second = Info;
+  }
+  return *I.first->second;
+}
+
 namespace {
 
 /// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -65,7 +80,7 @@ class MIParser {
   SMDiagnostic &Error;
   StringRef Source, CurrentSource;
   MIToken Token;
-  const PerFunctionMIParsingState &PFS;
+  PerFunctionMIParsingState &PFS;
   /// Maps from instruction names to op codes.
   StringMap<unsigned> Names2InstrOpCodes;
   /// Maps from register names to registers.
@@ -86,7 +101,7 @@ class MIParser {
   StringMap<unsigned> Names2BitmaskTargetFlags;
 
 public:
-  MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+  MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
            StringRef Source);
 
   /// \p SkipChar gives the number of characters to skip before looking
@@ -109,7 +124,8 @@ public:
   bool parse(MachineInstr *&MI);
   bool parseStandaloneMBB(MachineBasicBlock *&MBB);
   bool parseStandaloneNamedRegister(unsigned &Reg);
-  bool parseStandaloneVirtualRegister(unsigned &Reg);
+  bool parseStandaloneVirtualRegister(VRegInfo *&Info);
+  bool parseStandaloneRegister(unsigned &Reg);
   bool parseStandaloneStackObject(int &FI);
   bool parseStandaloneMDNode(MDNode *&Node);
 
@@ -119,21 +135,19 @@ public:
   bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
   bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
 
-  bool parseRegister(unsigned &Reg);
+  bool parseNamedRegister(unsigned &Reg);
+  bool parseVirtualRegister(VRegInfo *&Info);
+  bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
   bool parseRegisterFlag(unsigned &Flags);
   bool parseSubRegisterIndex(unsigned &SubReg);
   bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
-  bool parseSize(unsigned &Size);
   bool parseRegisterOperand(MachineOperand &Dest,
                             Optional<unsigned> &TiedDefIdx, bool IsDef = false);
   bool parseImmediateOperand(MachineOperand &Dest);
   bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
                        const Constant *&C);
   bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
-  bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read,
-                   Type *&Ty);
-  // \p MustBeSized defines whether or not \p Ty must be sized.
-  bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true);
+  bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty);
   bool parseTypedImmediateOperand(MachineOperand &Dest);
   bool parseFPImmediateOperand(MachineOperand &Dest);
   bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -155,6 +169,8 @@ public:
   bool parseCFIOperand(MachineOperand &Dest);
   bool parseIRBlock(BasicBlock *&BB, const Function &F);
   bool parseBlockAddressOperand(MachineOperand &Dest);
+  bool parseIntrinsicOperand(MachineOperand &Dest);
+  bool parsePredicateOperand(MachineOperand &Dest);
   bool parseTargetIndexOperand(MachineOperand &Dest);
   bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
   bool parseMachineOperand(MachineOperand &Dest,
@@ -181,6 +197,12 @@ private:
   /// Return true if an error occurred.
   bool getUint64(uint64_t &Result);
 
+  /// Convert the hexadecimal literal in the current token into an unsigned
+  ///  APInt with a minimum bitwidth required to represent the value.
+  ///
+  /// Return true if the literal does not represent an integer value.
+  bool getHexUint(APInt &Result);
+
   /// If the current token is of the given kind, consume it and return false.
   /// Otherwise report an error and return true.
   bool expectAndConsume(MIToken::TokenKind TokenKind);
@@ -254,7 +276,7 @@ private:
 
 } // end anonymous namespace
 
-MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
                    StringRef Source)
     : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
 {}
@@ -362,7 +384,7 @@ bool MIParser::parseBasicBlockDefinition(
 
   if (!Name.empty()) {
     BB = dyn_cast_or_null<BasicBlock>(
-        MF.getFunction()->getValueSymbolTable().lookup(Name));
+        MF.getFunction()->getValueSymbolTable()->lookup(Name));
     if (!BB)
       return error(Loc, Twine("basic block '") + Name +
                             "' is not defined in the function '" +
@@ -437,10 +459,24 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
     if (Token.isNot(MIToken::NamedRegister))
       return error("expected a named register");
     unsigned Reg = 0;
-    if (parseRegister(Reg))
+    if (parseNamedRegister(Reg))
       return true;
-    MBB.addLiveIn(Reg);
     lex();
+    LaneBitmask Mask = LaneBitmask::getAll();
+    if (consumeIfPresent(MIToken::colon)) {
+      // Parse lane mask.
+      if (Token.isNot(MIToken::IntegerLiteral) &&
+          Token.isNot(MIToken::HexLiteral))
+        return error("expected a lane mask");
+      static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned),
+                    "Use correct get-function for lane mask");
+      LaneBitmask::Type V;
+      if (getUnsigned(V))
+        return error("invalid lane mask value");
+      Mask = LaneBitmask(V);
+      lex();
+    }
+    MBB.addLiveIn(Reg, Mask);
   } while (consumeIfPresent(MIToken::comma));
   return false;
 }
@@ -461,7 +497,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
     lex();
     unsigned Weight = 0;
     if (consumeIfPresent(MIToken::lparen)) {
-      if (Token.isNot(MIToken::IntegerLiteral))
+      if (Token.isNot(MIToken::IntegerLiteral) &&
+          Token.isNot(MIToken::HexLiteral))
         return error("expected an integer literal after '('");
       if (getUnsigned(Weight))
         return true;
@@ -597,14 +634,6 @@ bool MIParser::parse(MachineInstr *&MI) {
   if (Token.isError() || parseInstruction(OpCode, Flags))
     return true;
 
-  Type *Ty = nullptr;
-  if (isPreISelGenericOpcode(OpCode)) {
-    // For generic opcode, a type is mandatory.
-    auto Loc = Token.location();
-    if (parseIRType(Loc, Ty))
-      return true;
-  }
-
   // Parse the remaining machine operands.
   while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
@@ -660,8 +689,6 @@ bool MIParser::parse(MachineInstr *&MI) {
   // TODO: Check for extraneous machine operands.
   MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
   MI->setFlags(Flags);
-  if (Ty)
-    MI->setType(Ty);
   for (const auto &Operand : Operands)
     MI->addOperand(MF, Operand.Operand);
   if (assignRegisterTies(*MI, Operands))
@@ -692,7 +719,7 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
   lex();
   if (Token.isNot(MIToken::NamedRegister))
     return error("expected a named register");
-  if (parseRegister(Reg))
+  if (parseNamedRegister(Reg))
     return true;
   lex();
   if (Token.isNot(MIToken::Eof))
@@ -700,12 +727,28 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
   return false;
 }
 
-bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) {
   lex();
   if (Token.isNot(MIToken::VirtualRegister))
     return error("expected a virtual register");
-  if (parseRegister(Reg))
+  if (parseVirtualRegister(Info))
+    return true;
+  lex();
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the register reference");
+  return false;
+}
+
+bool MIParser::parseStandaloneRegister(unsigned &Reg) {
+  lex();
+  if (Token.isNot(MIToken::NamedRegister) &&
+      Token.isNot(MIToken::VirtualRegister))
+    return error("expected either a named or virtual register");
+
+  VRegInfo *Info;
+  if (parseRegister(Reg, Info))
     return true;
+
   lex();
   if (Token.isNot(MIToken::Eof))
     return error("expected end of string after the register reference");
@@ -800,33 +843,39 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
   return false;
 }
 
-bool MIParser::parseRegister(unsigned &Reg) {
+bool MIParser::parseNamedRegister(unsigned &Reg) {
+  assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
+  StringRef Name = Token.stringValue();
+  if (getRegisterByName(Name, Reg))
+    return error(Twine("unknown register name '") + Name + "'");
+  return false;
+}
+
+bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
+  assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  Info = &PFS.getVRegInfo(ID);
+  return false;
+}
+
+bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
   switch (Token.kind()) {
   case MIToken::underscore:
     Reg = 0;
-    break;
-  case MIToken::NamedRegister: {
-    StringRef Name = Token.stringValue();
-    if (getRegisterByName(Name, Reg))
-      return error(Twine("unknown register name '") + Name + "'");
-    break;
-  }
-  case MIToken::VirtualRegister: {
-    unsigned ID;
-    if (getUnsigned(ID))
+    return false;
+  case MIToken::NamedRegister:
+    return parseNamedRegister(Reg);
+  case MIToken::VirtualRegister:
+    if (parseVirtualRegister(Info))
       return true;
-    const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
-    if (RegInfo == PFS.VirtualRegisterSlots.end())
-      return error(Twine("use of undefined virtual register '%") + Twine(ID) +
-                   "'");
-    Reg = RegInfo->second;
-    break;
-  }
+    Reg = Info->VReg;
+    return false;
   // TODO: Parse other register kinds.
   default:
     llvm_unreachable("The current token should be a register");
   }
-  return false;
 }
 
 bool MIParser::parseRegisterFlag(unsigned &Flags) {
@@ -871,10 +920,10 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
 }
 
 bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
-  assert(Token.is(MIToken::colon));
+  assert(Token.is(MIToken::dot));
   lex();
   if (Token.isNot(MIToken::Identifier))
-    return error("expected a subregister index after ':'");
+    return error("expected a subregister index after '.'");
   auto Name = Token.stringValue();
   SubReg = getSubRegIndex(Name);
   if (!SubReg)
@@ -885,7 +934,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
 
 bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
   if (!consumeIfPresent(MIToken::kw_tied_def))
-    return error("expected 'tied-def' after '('");
+    return true;
   if (Token.isNot(MIToken::IntegerLiteral))
     return error("expected an integer literal after 'tied-def'");
   if (getUnsigned(TiedDefIdx))
@@ -896,17 +945,6 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
   return false;
 }
 
-bool MIParser::parseSize(unsigned &Size) {
-  if (Token.isNot(MIToken::IntegerLiteral))
-    return error("expected an integer literal for the size");
-  if (getUnsigned(Size))
-    return true;
-  lex();
-  if (expectAndConsume(MIToken::rparen))
-    return true;
-  return false;
-}
-
 bool MIParser::assignRegisterTies(MachineInstr &MI,
                                   ArrayRef<ParsedMachineOperand> Operands) {
   SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
@@ -947,7 +985,6 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
 bool MIParser::parseRegisterOperand(MachineOperand &Dest,
                                     Optional<unsigned> &TiedDefIdx,
                                     bool IsDef) {
-  unsigned Reg;
   unsigned Flags = IsDef ? RegState::Define : 0;
   while (Token.isRegisterFlag()) {
     if (parseRegisterFlag(Flags))
@@ -955,38 +992,62 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
   }
   if (!Token.isRegister())
     return error("expected a register after register flags");
-  if (parseRegister(Reg))
+  unsigned Reg;
+  VRegInfo *RegInfo;
+  if (parseRegister(Reg, RegInfo))
     return true;
   lex();
   unsigned SubReg = 0;
-  if (Token.is(MIToken::colon)) {
+  if (Token.is(MIToken::dot)) {
     if (parseSubRegisterIndex(SubReg))
       return true;
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       return error("subregister index expects a virtual register");
   }
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   if ((Flags & RegState::Define) == 0) {
     if (consumeIfPresent(MIToken::lparen)) {
       unsigned Idx;
-      if (parseRegisterTiedDefIndex(Idx))
-        return true;
-      TiedDefIdx = Idx;
+      if (!parseRegisterTiedDefIndex(Idx))
+        TiedDefIdx = Idx;
+      else {
+        // Try a redundant low-level type.
+        LLT Ty;
+        if (parseLowLevelType(Token.location(), Ty))
+          return error("expected tied-def or low-level type after '('");
+
+        if (expectAndConsume(MIToken::rparen))
+          return true;
+
+        if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+          return error("inconsistent type for generic virtual register");
+
+        MRI.setType(Reg, Ty);
+      }
     }
   } else if (consumeIfPresent(MIToken::lparen)) {
-    // Virtual registers may have a size with GlobalISel.
+    // Virtual registers may have a tpe with GlobalISel.
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      return error("unexpected size on physical register");
-    unsigned Size;
-    if (parseSize(Size))
+      return error("unexpected type on physical register");
+
+    LLT Ty;
+    if (parseLowLevelType(Token.location(), Ty))
       return true;
 
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-    MRI.setSize(Reg, Size);
-  } else if (PFS.GenericVRegs.count(Reg)) {
-    // Generic virtual registers must have a size.
-    // If we end up here this means the size hasn't been specified and
+    if (expectAndConsume(MIToken::rparen))
+      return true;
+
+    if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+      return error("inconsistent type for generic virtual register");
+
+    MRI.setType(Reg, Ty);
+  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    // Generic virtual registers must have a type.
+    // If we end up here this means the type hasn't been specified and
     // this is bad!
-    return error("generic virtual registers must have a size");
+    if (RegInfo->Kind == VRegInfo::GENERIC ||
+        RegInfo->Kind == VRegInfo::REGBANK)
+      return error("generic virtual registers must have a type");
   }
   Dest = MachineOperand::CreateReg(
       Reg, Flags & RegState::Define, Flags & RegState::Implicit,
@@ -1010,7 +1071,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
                                const Constant *&C) {
   auto Source = StringValue.str(); // The source has to be null terminated.
   SMDiagnostic Err;
-  C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+  C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(),
                          &PFS.IRSlots);
   if (!C)
     return error(Loc + Err.getColumnNo(), Err.getMessage());
@@ -1024,35 +1085,45 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
   return false;
 }
 
-bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue,
-                           unsigned &Read, Type *&Ty) {
-  auto Source = StringValue.str(); // The source has to be null terminated.
-  SMDiagnostic Err;
-  Ty = parseTypeAtBeginning(Source.c_str(), Read, Err,
-                            *MF.getFunction()->getParent(), &PFS.IRSlots);
-  if (!Ty)
-    return error(Loc + Err.getColumnNo(), Err.getMessage());
-  return false;
-}
+bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
+  if (Token.is(MIToken::ScalarType)) {
+    Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+    lex();
+    return false;
+  } else if (Token.is(MIToken::PointerType)) {
+    const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
+    unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+    Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+    lex();
+    return false;
+  }
 
-bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty,
-                           bool MustBeSized) {
-  // At this point we enter in the IR world, i.e., to get the correct type,
-  // we need to hand off the whole string, not just the current token.
-  // E.g., <4 x i64> would give '<' as a token and there is not much
-  // the IR parser can do with that.
-  unsigned Read = 0;
-  if (parseIRType(Loc, StringRef(Loc), Read, Ty))
-    return true;
-  // The type must be sized, otherwise there is not much the backend
-  // can do with it.
-  if (MustBeSized && !Ty->isSized())
-    return error("expected a sized type");
-  // The next token is Read characters from the Loc.
-  // However, the current location is not Loc, but Loc + the length of Token.
-  // Therefore, subtract the length of Token (range().end() - Loc) to the
-  // number of characters to skip before the next token.
-  lex(Read - (Token.range().end() - Loc));
+  // Now we're looking for a vector.
+  if (Token.isNot(MIToken::less))
+    return error(Loc,
+                 "expected unsized, pN, sN or <N x sM> for GlobalISel type");
+
+  lex();
+
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error(Loc, "expected <N x sM> for vctor type");
+  uint64_t NumElements = Token.integerValue().getZExtValue();
+  lex();
+
+  if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
+    return error(Loc, "expected '<N x sM>' for vector type");
+  lex();
+
+  if (Token.isNot(MIToken::ScalarType))
+    return error(Loc, "expected '<N x sM>' for vector type");
+  uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+  lex();
+
+  if (Token.isNot(MIToken::greater))
+    return error(Loc, "expected '<N x sM>' for vector type");
+  lex();
+
+  Ty = LLT::vector(NumElements, ScalarSize);
   return false;
 }
 
@@ -1072,7 +1143,8 @@ bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
 bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
   auto Loc = Token.location();
   lex();
-  if (Token.isNot(MIToken::FloatingPointLiteral))
+  if (Token.isNot(MIToken::FloatingPointLiteral) &&
+      Token.isNot(MIToken::HexLiteral))
     return error("expected a floating point literal");
   const Constant *C = nullptr;
   if (parseIRConstant(Loc, C))
@@ -1082,13 +1154,24 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
 }
 
 bool MIParser::getUnsigned(unsigned &Result) {
-  assert(Token.hasIntegerValue() && "Expected a token with an integer value");
-  const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
-  uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
-  if (Val64 == Limit)
-    return error("expected 32-bit integer (too large)");
-  Result = Val64;
-  return false;
+  if (Token.hasIntegerValue()) {
+    const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+    uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+    if (Val64 == Limit)
+      return error("expected 32-bit integer (too large)");
+    Result = Val64;
+    return false;
+  }
+  if (Token.is(MIToken::HexLiteral)) {
+    APInt A;
+    if (getHexUint(A))
+      return true;
+    if (A.getBitWidth() > 32)
+      return error("expected 32-bit integer (too large)");
+    Result = A.getZExtValue();
+    return false;
+  }
+  return true;
 }
 
 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
@@ -1128,7 +1211,7 @@ bool MIParser::parseStackFrameIndex(int &FI) {
                  "'");
   StringRef Name;
   if (const auto *Alloca =
-          MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+          MF.getFrameInfo().getObjectAllocation(ObjectInfo->second))
     Name = Alloca->getName();
   if (!Token.stringValue().empty() && Token.stringValue() != Name)
     return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
@@ -1293,7 +1376,7 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
   if (Token.isNot(MIToken::NamedRegister))
     return error("expected a cfi register");
   unsigned LLVMReg;
-  if (parseRegister(LLVMReg))
+  if (parseNamedRegister(LLVMReg))
     return true;
   const auto *TRI = MF.getSubtarget().getRegisterInfo();
   assert(TRI && "Expected target register info");
@@ -1308,7 +1391,6 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
 bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   auto Kind = Token.kind();
   lex();
-  auto &MMI = MF.getMMI();
   int Offset;
   unsigned Reg;
   unsigned CFIIndex;
@@ -1316,27 +1398,26 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   case MIToken::kw_cfi_same_value:
     if (parseCFIRegister(Reg))
       return true;
-    CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
     break;
   case MIToken::kw_cfi_offset:
     if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
         parseCFIOffset(Offset))
       return true;
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+        MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
     break;
   case MIToken::kw_cfi_def_cfa_register:
     if (parseCFIRegister(Reg))
       return true;
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+        MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
     break;
   case MIToken::kw_cfi_def_cfa_offset:
     if (parseCFIOffset(Offset))
       return true;
     // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
-    CFIIndex = MMI.addFrameInst(
+    CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
     break;
   case MIToken::kw_cfi_def_cfa:
@@ -1345,7 +1426,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
       return true;
     // NB: MCCFIInstruction::createDefCfa negates the offset.
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+        MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
     break;
   default:
     // TODO: Parse the other CFI operands.
@@ -1359,7 +1440,7 @@ bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
   switch (Token.kind()) {
   case MIToken::NamedIRBlock: {
     BB = dyn_cast_or_null<BasicBlock>(
-        F.getValueSymbolTable().lookup(Token.stringValue()));
+        F.getValueSymbolTable()->lookup(Token.stringValue()));
     if (!BB)
       return error(Twine("use of undefined IR block '") + Token.range() + "'");
     break;
@@ -1411,6 +1492,93 @@ bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
   return false;
 }
 
+bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_intrinsic));
+  lex();
+  if (expectAndConsume(MIToken::lparen))
+    return error("expected syntax intrinsic(@llvm.whatever)");
+
+  if (Token.isNot(MIToken::NamedGlobalValue))
+    return error("expected syntax intrinsic(@llvm.whatever)");
+
+  std::string Name = Token.stringValue();
+  lex();
+
+  if (expectAndConsume(MIToken::rparen))
+    return error("expected ')' to terminate intrinsic name");
+
+  // Find out what intrinsic we're dealing with, first try the global namespace
+  // and then the target's private intrinsics if that fails.
+  const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+  Intrinsic::ID ID = Function::lookupIntrinsicID(Name);
+  if (ID == Intrinsic::not_intrinsic && TII)
+    ID = static_cast<Intrinsic::ID>(TII->lookupName(Name));
+
+  if (ID == Intrinsic::not_intrinsic)
+    return error("unknown intrinsic name");
+  Dest = MachineOperand::CreateIntrinsicID(ID);
+
+  return false;
+}
+
+bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred));
+  bool IsFloat = Token.is(MIToken::kw_floatpred);
+  lex();
+
+  if (expectAndConsume(MIToken::lparen))
+    return error("expected syntax intpred(whatever) or floatpred(whatever");
+
+  if (Token.isNot(MIToken::Identifier))
+    return error("whatever");
+
+  CmpInst::Predicate Pred;
+  if (IsFloat) {
+    Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+               .Case("false", CmpInst::FCMP_FALSE)
+               .Case("oeq", CmpInst::FCMP_OEQ)
+               .Case("ogt", CmpInst::FCMP_OGT)
+               .Case("oge", CmpInst::FCMP_OGE)
+               .Case("olt", CmpInst::FCMP_OLT)
+               .Case("ole", CmpInst::FCMP_OLE)
+               .Case("one", CmpInst::FCMP_ONE)
+               .Case("ord", CmpInst::FCMP_ORD)
+               .Case("uno", CmpInst::FCMP_UNO)
+               .Case("ueq", CmpInst::FCMP_UEQ)
+               .Case("ugt", CmpInst::FCMP_UGT)
+               .Case("uge", CmpInst::FCMP_UGE)
+               .Case("ult", CmpInst::FCMP_ULT)
+               .Case("ule", CmpInst::FCMP_ULE)
+               .Case("une", CmpInst::FCMP_UNE)
+               .Case("true", CmpInst::FCMP_TRUE)
+               .Default(CmpInst::BAD_FCMP_PREDICATE);
+    if (!CmpInst::isFPPredicate(Pred))
+      return error("invalid floating-point predicate");
+  } else {
+    Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+               .Case("eq", CmpInst::ICMP_EQ)
+               .Case("ne", CmpInst::ICMP_NE)
+               .Case("sgt", CmpInst::ICMP_SGT)
+               .Case("sge", CmpInst::ICMP_SGE)
+               .Case("slt", CmpInst::ICMP_SLT)
+               .Case("sle", CmpInst::ICMP_SLE)
+               .Case("ugt", CmpInst::ICMP_UGT)
+               .Case("uge", CmpInst::ICMP_UGE)
+               .Case("ult", CmpInst::ICMP_ULT)
+               .Case("ule", CmpInst::ICMP_ULE)
+               .Default(CmpInst::BAD_ICMP_PREDICATE);
+    if (!CmpInst::isIntPredicate(Pred))
+      return error("invalid integer predicate");
+  }
+
+  lex();
+  Dest = MachineOperand::CreatePredicate(Pred);
+  if (expectAndConsume(MIToken::rparen))
+    return error("predicate should be terminated by ')'.");
+
+  return false;
+}
+
 bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::kw_target_index));
   lex();
@@ -1441,8 +1609,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
   while (true) {
     if (Token.isNot(MIToken::NamedRegister))
       return error("expected a named register");
-    unsigned Reg = 0;
-    if (parseRegister(Reg))
+    unsigned Reg;
+    if (parseNamedRegister(Reg))
       return true;
     lex();
     Mask[Reg / 32] |= 1U << (Reg % 32);
@@ -1511,10 +1679,15 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
+  case MIToken::kw_intrinsic:
+    return parseIntrinsicOperand(Dest);
   case MIToken::kw_target_index:
     return parseTargetIndexOperand(Dest);
   case MIToken::kw_liveout:
     return parseLiveoutRegisterMaskOperand(Dest);
+  case MIToken::kw_floatpred:
+  case MIToken::kw_intpred:
+    return parsePredicateOperand(Dest);
   case MIToken::Error:
     return true;
   case MIToken::Identifier:
@@ -1523,7 +1696,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
       lex();
       break;
     }
-  // fallthrough
+    LLVM_FALLTHROUGH;
   default:
     // FIXME: Parse the MCSymbol machine operand.
     return error("expected a machine operand");
@@ -1613,7 +1786,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
 bool MIParser::parseIRValue(const Value *&V) {
   switch (Token.kind()) {
   case MIToken::NamedIRValue: {
-    V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+    V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue());
     break;
   }
   case MIToken::IRValue: {
@@ -1647,10 +1820,35 @@ bool MIParser::parseIRValue(const Value *&V) {
 }
 
 bool MIParser::getUint64(uint64_t &Result) {
-  assert(Token.hasIntegerValue());
-  if (Token.integerValue().getActiveBits() > 64)
-    return error("expected 64-bit integer (too large)");
-  Result = Token.integerValue().getZExtValue();
+  if (Token.hasIntegerValue()) {
+    if (Token.integerValue().getActiveBits() > 64)
+      return error("expected 64-bit integer (too large)");
+    Result = Token.integerValue().getZExtValue();
+    return false;
+  }
+  if (Token.is(MIToken::HexLiteral)) {
+    APInt A;
+    if (getHexUint(A))
+      return true;
+    if (A.getBitWidth() > 64)
+      return error("expected 64-bit integer (too large)");
+    Result = A.getZExtValue();
+    return false;
+  }
+  return true;
+}
+
+bool MIParser::getHexUint(APInt &Result) {
+  assert(Token.is(MIToken::HexLiteral));
+  StringRef S = Token.range();
+  assert(S[0] == '0' && tolower(S[1]) == 'x');
+  // This could be a floating point literal with a special prefix.
+  if (!isxdigit(S[2]))
+    return true;
+  StringRef V = S.substr(2);
+  APInt A(V.size()*4, V, 16);
+  Result = APInt(A.getActiveBits(),
+                 ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
   return false;
 }
 
@@ -1663,6 +1861,9 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
   case MIToken::kw_non_temporal:
     Flags |= MachineMemOperand::MONonTemporal;
     break;
+  case MIToken::kw_dereferenceable:
+    Flags |= MachineMemOperand::MODereferenceable;
+    break;
   case MIToken::kw_invariant:
     Flags |= MachineMemOperand::MOInvariant;
     break;
@@ -2059,36 +2260,42 @@ bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
   return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
 }
 
-bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS,
                                     StringRef Src, SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseBasicBlocks();
 }
 
-bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS,
                              MachineBasicBlock *&MBB, StringRef Src,
                              SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
 }
 
-bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS,
+                                  unsigned &Reg, StringRef Src,
+                                  SMDiagnostic &Error) {
+  return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg);
+}
+
+bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS,
                                        unsigned &Reg, StringRef Src,
                                        SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
 }
 
-bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
-                                         unsigned &Reg, StringRef Src,
+bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+                                         VRegInfo *&Info, StringRef Src,
                                          SMDiagnostic &Error) {
-  return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg);
+  return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info);
 }
 
-bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS,
                                      int &FI, StringRef Src,
                                      SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
 }
 
-bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
                        MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
 }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 18895b9..93a4d84 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -26,26 +26,42 @@ class MachineFunction;
 class MachineInstr;
 class MachineRegisterInfo;
 class MDNode;
+class RegisterBank;
 struct SlotMapping;
 class SMDiagnostic;
 class SourceMgr;
+class TargetRegisterClass;
+
+struct VRegInfo {
+  enum uint8_t {
+    UNKNOWN, NORMAL, GENERIC, REGBANK
+  } Kind = UNKNOWN;
+  bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
+  union {
+    const TargetRegisterClass *RC;
+    const RegisterBank *RegBank;
+  } D;
+  unsigned VReg;
+  unsigned PreferredReg = 0;
+};
 
 struct PerFunctionMIParsingState {
+  BumpPtrAllocator Allocator;
   MachineFunction &MF;
   SourceMgr *SM;
   const SlotMapping &IRSlots;
 
   DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
-  DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+  DenseMap<unsigned, VRegInfo*> VRegInfos;
   DenseMap<unsigned, int> FixedStackObjectSlots;
   DenseMap<unsigned, int> StackObjectSlots;
   DenseMap<unsigned, unsigned> ConstantPoolSlots;
   DenseMap<unsigned, unsigned> JumpTableSlots;
-  /// Hold the generic virtual registers.
-  SmallSet<unsigned, 8> GenericVRegs;
 
   PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
                             const SlotMapping &IRSlots);
+
+  VRegInfo &getVRegInfo(unsigned VReg);
 };
 
 /// Parse the machine basic block definitions, and skip the machine
@@ -73,26 +89,29 @@ bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
 /// on the given source string.
 ///
 /// Return true if an error occurred.
-bool parseMachineInstructions(const PerFunctionMIParsingState &PFS,
-                              StringRef Src, SMDiagnostic &Error);
+bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
+                              SMDiagnostic &Error);
 
-bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool parseMBBReference(PerFunctionMIParsingState &PFS,
                        MachineBasicBlock *&MBB, StringRef Src,
                        SMDiagnostic &Error);
 
-bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
-                                 unsigned &Reg, StringRef Src,
-                                 SMDiagnostic &Error);
+bool parseRegisterReference(PerFunctionMIParsingState &PFS,
+                            unsigned &Reg, StringRef Src,
+                            SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+                                 StringRef Src, SMDiagnostic &Error);
 
-bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
-                                   unsigned &Reg, StringRef Src,
+bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+                                   VRegInfo *&Info, StringRef Src,
                                    SMDiagnostic &Error);
 
-bool parseStackObjectReference(const PerFunctionMIParsingState &PFS,
-                               int &FI, StringRef Src, SMDiagnostic &Error);
+bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
+                               StringRef Src, SMDiagnostic &Error);
 
-bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
-                 StringRef Src, SMDiagnostic &Error);
+bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
+                 SMDiagnostic &Error);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 4aa3df6..3dff114 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -102,10 +102,10 @@ public:
   /// Return true if error occurred.
   bool initializeMachineFunction(MachineFunction &MF);
 
-  bool initializeRegisterInfo(PerFunctionMIParsingState &PFS,
-                              const yaml::MachineFunction &YamlMF);
+  bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
+                         const yaml::MachineFunction &YamlMF);
 
-  void inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+  bool setupRegisterInfo(const PerFunctionMIParsingState &PFS,
                          const yaml::MachineFunction &YamlMF);
 
   bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
@@ -128,10 +128,10 @@ public:
                                const yaml::MachineJumpTable &YamlJTI);
 
 private:
-  bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+  bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
                    const yaml::StringValue &Source);
 
-  bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+  bool parseMBBReference(PerFunctionMIParsingState &PFS,
                          MachineBasicBlock *&MBB,
                          const yaml::StringValue &Source);
 
@@ -160,6 +160,8 @@ private:
   ///
   /// Return null if the name isn't a register bank.
   const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
+
+  void computeFunctionProperties(MachineFunction &MF);
 };
 
 } // end namespace llvm
@@ -255,7 +257,8 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
 bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
                                          bool NoLLVMIR) {
   auto MF = llvm::make_unique<yaml::MachineFunction>();
-  yaml::yamlize(In, *MF, false);
+  yaml::EmptyContext Ctx;
+  yaml::yamlize(In, *MF, false, Ctx);
   if (In.error())
     return true;
   auto FunctionName = MF->Name;
@@ -279,6 +282,43 @@ void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
   new UnreachableInst(Context, BB);
 }
 
+static bool isSSA(const MachineFunction &MF) {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+    if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
+      return false;
+  }
+  return true;
+}
+
+void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
+  MachineFunctionProperties &Properties = MF.getProperties();
+
+  bool HasPHI = false;
+  bool HasInlineAsm = false;
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.isPHI())
+        HasPHI = true;
+      if (MI.isInlineAsm())
+        HasInlineAsm = true;
+    }
+  }
+  if (!HasPHI)
+    Properties.set(MachineFunctionProperties::Property::NoPHIs);
+  MF.setHasInlineAsm(HasInlineAsm);
+
+  if (isSSA(MF))
+    Properties.set(MachineFunctionProperties::Property::IsSSA);
+  else
+    Properties.reset(MachineFunctionProperties::Property::IsSSA);
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (MRI.getNumVirtRegs() == 0)
+    Properties.set(MachineFunctionProperties::Property::NoVRegs);
+}
+
 bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   auto It = Functions.find(MF.getName());
   if (It == Functions.end())
@@ -289,11 +329,17 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   if (YamlMF.Alignment)
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
-  MF.setHasInlineAsm(YamlMF.HasInlineAsm);
-  if (YamlMF.AllVRegsAllocated)
-    MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated);
+
+  if (YamlMF.Legalized)
+    MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
+  if (YamlMF.RegBankSelected)
+    MF.getProperties().set(
+        MachineFunctionProperties::Property::RegBankSelected);
+  if (YamlMF.Selected)
+    MF.getProperties().set(MachineFunctionProperties::Property::Selected);
+
   PerFunctionMIParsingState PFS(MF, SM, IRSlots);
-  if (initializeRegisterInfo(PFS, YamlMF))
+  if (parseRegisterInfo(PFS, YamlMF))
     return true;
   if (!YamlMF.Constants.empty()) {
     auto *ConstantPool = MF.getConstantPool();
@@ -343,62 +389,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   }
   PFS.SM = &SM;
 
-  inferRegisterInfo(PFS, YamlMF);
-  // FIXME: This is a temporary workaround until the reserved registers can be
-  // serialized.
-  MF.getRegInfo().freezeReservedRegs(MF);
+  if (setupRegisterInfo(PFS, YamlMF))
+    return true;
+
+  computeFunctionProperties(MF);
+
   MF.verify();
   return false;
 }
 
-bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
-    const yaml::MachineFunction &YamlMF) {
+bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
+                                      const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  assert(RegInfo.isSSA());
-  if (!YamlMF.IsSSA)
-    RegInfo.leaveSSA();
   assert(RegInfo.tracksLiveness());
   if (!YamlMF.TracksRegLiveness)
     RegInfo.invalidateLiveness();
-  RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
 
   SMDiagnostic Error;
   // Parse the virtual register information.
   for (const auto &VReg : YamlMF.VirtualRegisters) {
-    unsigned Reg;
+    VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value);
+    if (Info.Explicit)
+      return error(VReg.ID.SourceRange.Start,
+                   Twine("redefinition of virtual register '%") +
+                       Twine(VReg.ID.Value) + "'");
+    Info.Explicit = true;
+
     if (StringRef(VReg.Class.Value).equals("_")) {
-      // This is a generic virtual register.
-      // The size will be set appropriately when we reach the definition.
-      Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
-      PFS.GenericVRegs.insert(Reg);
+      Info.Kind = VRegInfo::GENERIC;
     } else {
       const auto *RC = getRegClass(MF, VReg.Class.Value);
       if (RC) {
-        Reg = RegInfo.createVirtualRegister(RC);
+        Info.Kind = VRegInfo::NORMAL;
+        Info.D.RC = RC;
       } else {
-        const auto *RegBank = getRegBank(MF, VReg.Class.Value);
+        const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
         if (!RegBank)
           return error(
               VReg.Class.SourceRange.Start,
               Twine("use of undefined register class or register bank '") +
                   VReg.Class.Value + "'");
-        Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
-        RegInfo.setRegBank(Reg, *RegBank);
-        PFS.GenericVRegs.insert(Reg);
+        Info.Kind = VRegInfo::REGBANK;
+        Info.D.RegBank = RegBank;
       }
     }
-    if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
-             .second)
-      return error(VReg.ID.SourceRange.Start,
-                   Twine("redefinition of virtual register '%") +
-                       Twine(VReg.ID.Value) + "'");
+
     if (!VReg.PreferredRegister.Value.empty()) {
-      unsigned PreferredReg = 0;
-      if (parseNamedRegisterReference(PFS, PreferredReg,
-                                      VReg.PreferredRegister.Value, Error))
+      if (Info.Kind != VRegInfo::NORMAL)
+        return error(VReg.Class.SourceRange.Start,
+              Twine("preferred register can only be set for normal vregs"));
+
+      if (parseRegisterReference(PFS, Info.PreferredReg,
+                                 VReg.PreferredRegister.Value, Error))
         return error(Error, VReg.PreferredRegister.SourceRange);
-      RegInfo.setSimpleHint(Reg, PreferredReg);
     }
   }
 
@@ -409,9 +453,11 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
       return error(Error, LiveIn.Register.SourceRange);
     unsigned VReg = 0;
     if (!LiveIn.VirtualRegister.Value.empty()) {
-      if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value,
+      VRegInfo *Info;
+      if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value,
                                         Error))
         return error(Error, LiveIn.VirtualRegister.SourceRange);
+      VReg = Info->VReg;
     }
     RegInfo.addLiveIn(Reg, VReg);
   }
@@ -430,26 +476,57 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
   return false;
 }
 
-void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
                                       const yaml::MachineFunction &YamlMF) {
-  if (YamlMF.CalleeSavedRegisters)
-    return;
-  MachineRegisterInfo &MRI = PFS.MF.getRegInfo();
-  for (const MachineBasicBlock &MBB : PFS.MF) {
-    for (const MachineInstr &MI : MBB) {
-      for (const MachineOperand &MO : MI.operands()) {
-        if (!MO.isRegMask())
-          continue;
-        MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+  MachineFunction &MF = PFS.MF;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool Error = false;
+  // Create VRegs
+  for (auto P : PFS.VRegInfos) {
+    const VRegInfo &Info = *P.second;
+    unsigned Reg = Info.VReg;
+    switch (Info.Kind) {
+    case VRegInfo::UNKNOWN:
+      error(Twine("Cannot determine class/bank of virtual register ") +
+            Twine(P.first) + " in function '" + MF.getName() + "'");
+      Error = true;
+      break;
+    case VRegInfo::NORMAL:
+      MRI.setRegClass(Reg, Info.D.RC);
+      if (Info.PreferredReg != 0)
+        MRI.setSimpleHint(Reg, Info.PreferredReg);
+      break;
+    case VRegInfo::GENERIC:
+      break;
+    case VRegInfo::REGBANK:
+      MRI.setRegBank(Reg, *Info.D.RegBank);
+      break;
+    }
+  }
+
+  // Compute MachineRegisterInfo::UsedPhysRegMask
+  if (!YamlMF.CalleeSavedRegisters) {
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isRegMask())
+            continue;
+          MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+        }
       }
     }
   }
+
+  // FIXME: This is a temporary workaround until the reserved registers can be
+  // serialized.
+  MRI.freezeReservedRegs(MF);
+  return Error;
 }
 
 bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                                         const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function &F = *MF.getFunction();
   const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
   MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -507,7 +584,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
     const yaml::StringValue &Name = Object.Name;
     if (!Name.Value.empty()) {
       Alloca = dyn_cast_or_null<AllocaInst>(
-          F.getValueSymbolTable().lookup(Name.Value));
+          F.getValueSymbolTable()->lookup(Name.Value));
       if (!Alloca)
         return error(Name.SourceRange.Start,
                      "alloca instruction named '" + Name.Value +
@@ -597,11 +674,11 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
       typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
       typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
     return true;
-  PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+  PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
   return false;
 }
 
-bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS,
     MDNode *&Node, const yaml::StringValue &Source) {
   if (Source.Value.empty())
     return false;
@@ -657,7 +734,7 @@ bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
   return false;
 }
 
-bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS,
                                       MachineBasicBlock *&MBB,
                                       const yaml::StringValue &Source) {
   SMDiagnostic Error;
@@ -784,6 +861,14 @@ std::unique_ptr<MIRParser>
 llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
                       LLVMContext &Context) {
   auto Filename = Contents->getBufferIdentifier();
+  if (Context.shouldDiscardValueNames()) {
+    Context.diagnose(DiagnosticInfoMIRParser(
+        DS_Error,
+        SMDiagnostic(
+            Filename, SourceMgr::DK_Error,
+            "Can't read MIR with a Context that discards named Values")));
+    return nullptr;
+  }
   return llvm::make_unique<MIRParser>(
       llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
 }
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index 703c99d..db87092 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,6 +14,7 @@
 
 #include "MIRPrinter.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -27,13 +28,16 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
@@ -86,10 +90,8 @@ public:
                const MachineConstantPool &ConstantPool);
   void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
                const MachineJumpTableInfo &JTI);
-  void convertStackObjects(yaml::MachineFunction &MF,
-                           const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
-                           ModuleSlotTracker &MST,
-                           const TargetRegisterInfo *TRI);
+  void convertStackObjects(yaml::MachineFunction &YMF,
+                           const MachineFunction &MF, ModuleSlotTracker &MST);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -121,7 +123,7 @@ public:
   void printTargetFlags(const MachineOperand &Op);
   void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
              unsigned I, bool ShouldPrintRegisterTies,
-             const MachineRegisterInfo *MRI = nullptr, bool IsDef = false);
+             LLT TypeToPrint, bool IsDef = false);
   void print(const MachineMemOperand &Op);
 
   void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
@@ -172,16 +174,19 @@ void MIRPrinter::print(const MachineFunction &MF) {
   YamlMF.Name = MF.getName();
   YamlMF.Alignment = MF.getAlignment();
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
-  YamlMF.HasInlineAsm = MF.hasInlineAsm();
-  YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty(
-      MachineFunctionProperties::Property::AllVRegsAllocated);
+
+  YamlMF.Legalized = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Legalized);
+  YamlMF.RegBankSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::RegBankSelected);
+  YamlMF.Selected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Selected);
 
   convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
   ModuleSlotTracker MST(MF.getFunction()->getParent());
   MST.incorporateFunction(*MF.getFunction());
-  convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
-  convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
-                      MF.getSubtarget().getRegisterInfo());
+  convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
+  convertStackObjects(YamlMF, MF, MST);
   if (const auto *ConstantPool = MF.getConstantPool())
     convert(YamlMF, *ConstantPool);
   if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -203,9 +208,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
 void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineRegisterInfo &RegInfo,
                          const TargetRegisterInfo *TRI) {
-  MF.IsSSA = RegInfo.isSSA();
   MF.TracksRegLiveness = RegInfo.tracksLiveness();
-  MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
 
   // Print the virtual register definitions.
   for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
@@ -219,7 +222,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
       VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
     else {
       VReg.Class = std::string("_");
-      assert(RegInfo.getSize(Reg) && "Generic registers must have a size");
+      assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+             "Generic registers must have a valid type");
     }
     unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
     if (PreferredReg)
@@ -279,11 +283,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
   }
 }
 
-void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
-                                     const MachineFrameInfo &MFI,
-                                     MachineModuleInfo &MMI,
-                                     ModuleSlotTracker &MST,
-                                     const TargetRegisterInfo *TRI) {
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
+                                     const MachineFunction &MF,
+                                     ModuleSlotTracker &MST) {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   // Process fixed stack objects.
   unsigned ID = 0;
   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -300,7 +304,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.Alignment = MFI.getObjectAlignment(I);
     YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
     YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
-    MF.FixedStackObjects.push_back(YamlObject);
+    YMF.FixedStackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(
         std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
   }
@@ -325,7 +329,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
 
-    MF.StackObjects.push_back(YamlObject);
+    YMF.StackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(std::make_pair(
         I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
   }
@@ -338,9 +342,9 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     if (StackObject.IsFixed)
-      MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
     else
-      MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+      YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
   }
   for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
     auto LocalObject = MFI.getLocalFrameObjectMap(I);
@@ -349,26 +353,26 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
-    MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+    YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
   }
 
   // Print the stack object references in the frame information class after
   // converting the stack objects.
   if (MFI.hasStackProtectorIndex()) {
-    raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+    raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value);
     MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
         .printStackObjectReference(MFI.getStackProtectorIndex());
   }
 
   // Print the debug variable information.
-  for (MachineModuleInfo::VariableDbgInfo &DebugVar :
-       MMI.getVariableDbgInfo()) {
+  for (const MachineFunction::VariableDbgInfo &DebugVar :
+       MF.getVariableDbgInfo()) {
     auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
     assert(StackObjectInfo != StackObjectOperandMapping.end() &&
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
-    auto &Object = MF.StackObjects[StackObject.ID];
+    auto &Object = YMF.StackObjects[StackObject.ID];
     {
       raw_string_ostream StrOS(Object.DebugVar.Value);
       DebugVar.Var->printAsOperand(StrOS, MST);
@@ -475,25 +479,27 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
         OS << ", ";
       printMBBReference(**I);
       if (MBB.hasSuccessorProbabilities())
-        OS << '(' << MBB.getSuccProbability(I) << ')';
+        OS << '('
+           << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
+           << ')';
     }
     OS << "\n";
     HasLineAttributes = true;
   }
 
   // Print the live in registers.
-  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
-  assert(TRI && "Expected target register info");
-  if (!MBB.livein_empty()) {
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+    const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
     OS.indent(2) << "liveins: ";
     bool First = true;
     for (const auto &LI : MBB.liveins()) {
       if (!First)
         OS << ", ";
       First = false;
-      printReg(LI.PhysReg, OS, TRI);
-      if (LI.LaneMask != ~0u)
-        OS << ':' << PrintLaneMask(LI.LaneMask);
+      printReg(LI.PhysReg, OS, &TRI);
+      if (!LI.LaneMask.all())
+        OS << ":0x" << PrintLaneMask(LI.LaneMask);
     }
     OS << "\n";
     HasLineAttributes = true;
@@ -537,6 +543,27 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) {
   return false;
 }
 
+static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
+                          SmallBitVector &PrintedTypes,
+                          const MachineRegisterInfo &MRI) {
+  const MachineOperand &Op = MI.getOperand(OpIdx);
+  if (!Op.isReg())
+    return LLT{};
+
+  if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
+    return MRI.getType(Op.getReg());
+
+  auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
+  if (!OpInfo.isGenericType())
+    return MRI.getType(Op.getReg());
+
+  if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+    return LLT{};
+
+  PrintedTypes.set(OpInfo.getGenericTypeIndex());
+  return MRI.getType(Op.getReg());
+}
+
 void MIPrinter::print(const MachineInstr &MI) {
   const auto *MF = MI.getParent()->getParent();
   const auto &MRI = MF->getRegInfo();
@@ -548,6 +575,7 @@ void MIPrinter::print(const MachineInstr &MI) {
   if (MI.isCFIInstruction())
     assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
 
+  SmallBitVector PrintedTypes(8);
   bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
   unsigned I = 0, E = MI.getNumOperands();
   for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
@@ -555,7 +583,8 @@ void MIPrinter::print(const MachineInstr &MI) {
        ++I) {
     if (I)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+          getTypeToPrint(MI, I, PrintedTypes, MRI),
           /*IsDef=*/true);
   }
 
@@ -564,11 +593,6 @@ void MIPrinter::print(const MachineInstr &MI) {
   if (MI.getFlag(MachineInstr::FrameSetup))
     OS << "frame-setup ";
   OS << TII->getName(MI.getOpcode());
-  if (isPreISelGenericOpcode(MI.getOpcode())) {
-    assert(MI.getType() && "Generic instructions must have a type");
-    OS << ' ';
-    MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
-  }
   if (I < E)
     OS << ' ';
 
@@ -576,7 +600,8 @@ void MIPrinter::print(const MachineInstr &MI) {
   for (; I < E; ++I) {
     if (NeedComma)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+          getTypeToPrint(MI, I, PrintedTypes, MRI));
     NeedComma = true;
   }
 
@@ -748,8 +773,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
 }
 
 void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
-                      unsigned I, bool ShouldPrintRegisterTies,
-                      const MachineRegisterInfo *MRI, bool IsDef) {
+                      unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint,
+                      bool IsDef) {
   printTargetFlags(Op);
   switch (Op.getType()) {
   case MachineOperand::MO_Register:
@@ -773,12 +798,11 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     printReg(Op.getReg(), OS, TRI);
     // Print the sub register.
     if (Op.getSubReg() != 0)
-      OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+      OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
     if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
       OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
-    assert((!IsDef || MRI) && "for IsDef, MRI must be provided");
-    if (IsDef && MRI->getSize(Op.getReg()))
-      OS << '(' << MRI->getSize(Op.getReg()) << ')';
+    if (TypeToPrint.isValid())
+      OS << '(' << TypeToPrint << ')';
     break;
   case MachineOperand::MO_Immediate:
     OS << Op.getImm();
@@ -861,8 +885,25 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
     break;
   case MachineOperand::MO_CFIIndex: {
-    const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
-    print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+    const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+    print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
+    break;
+  }
+  case MachineOperand::MO_IntrinsicID: {
+    Intrinsic::ID ID = Op.getIntrinsicID();
+    if (ID < Intrinsic::num_intrinsics)
+      OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
+    else {
+      const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+      const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+      OS << "intrinsic(@" << TII->getName(ID) << ')';
+    }
+    break;
+  }
+  case MachineOperand::MO_Predicate: {
+    auto Pred = static_cast<CmpInst::Predicate>(Op.getPredicate());
+    OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
+       << CmpInst::getPredicateName(Pred) << ')';
     break;
   }
   }
@@ -875,6 +916,8 @@ void MIPrinter::print(const MachineMemOperand &Op) {
     OS << "volatile ";
   if (Op.isNonTemporal())
     OS << "non-temporal ";
+  if (Op.isDereferenceable())
+    OS << "dereferenceable ";
   if (Op.isInvariant())
     OS << "invariant ";
   if (Op.isLoad())
@@ -917,6 +960,9 @@ void MIPrinter::print(const MachineMemOperand &Op) {
       printLLVMNameWithoutPrefix(
           OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
       break;
+    case PseudoSourceValue::TargetCustom:
+      llvm_unreachable("TargetCustom pseudo source values are not supported");
+      break;
     }
   }
   printOffset(Op.getOffset());
@@ -956,32 +1002,32 @@ void MIPrinter::print(const MCCFIInstruction &CFI,
                       const TargetRegisterInfo *TRI) {
   switch (CFI.getOperation()) {
   case MCCFIInstruction::OpSameValue:
-    OS << ".cfi_same_value ";
+    OS << "same_value ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
   case MCCFIInstruction::OpOffset:
-    OS << ".cfi_offset ";
+    OS << "offset ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     OS << ", " << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfaRegister:
-    OS << ".cfi_def_cfa_register ";
+    OS << "def_cfa_register ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
   case MCCFIInstruction::OpDefCfaOffset:
-    OS << ".cfi_def_cfa_offset ";
+    OS << "def_cfa_offset ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     OS << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfa:
-    OS << ".cfi_def_cfa ";
+    OS << "def_cfa ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 8e7566a..c690bcf 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -33,7 +33,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
   MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
   MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
 
-  const char *getPassName() const override { return "MIR Printing Pass"; }
+  StringRef getPassName() const override { return "MIR Printing Pass"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 689dd07..3869f97 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -51,7 +51,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   if (!CachedMCSymbol) {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
-    const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+    auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
     assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
     CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
@@ -74,7 +74,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
 /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
 /// gets the next available unique MBB number. If it is removed from a
 /// MachineFunction, it goes back to being #-1.
-void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
+    MachineBasicBlock *N) {
   MachineFunction &MF = *N->getParent();
   N->Number = MF.addToMBBNumbering(N);
 
@@ -85,7 +86,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
     I->AddRegOperandsToUseLists(RegInfo);
 }
 
-void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
+    MachineBasicBlock *N) {
   N->getParent()->removeFromMBBNumbering(N->Number);
   N->Number = -1;
 }
@@ -116,15 +118,13 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 
 /// When moving a range of instructions from one MBB list to another, we need to
 /// update the parent pointers and the use/def lists.
-void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &FromList,
-                      ilist_iterator<MachineInstr> First,
-                      ilist_iterator<MachineInstr> Last) {
+void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
+                                                       instr_iterator First,
+                                                       instr_iterator Last) {
   assert(Parent->getParent() == FromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
-
-  // Splice within the same MBB -> no change.
-  if (Parent == FromList.Parent) return;
+  assert(this != &FromList && "Called without a real transfer...");
+  assert(Parent != FromList.Parent && "Two lists have the same parent?");
 
   // If splicing between two blocks within the same function, just update the
   // parent pointers.
@@ -132,7 +132,7 @@ transferNodesFromList(ilist_traits<MachineInstr> &FromList,
     First->setParent(Parent);
 }
 
-void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
   assert(!MI->getParent() && "MI is still in a block!");
   Parent->getParent()->DeleteMachineInstr(MI);
 }
@@ -149,12 +149,25 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
 MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
   iterator E = end();
+  while (I != E && (I->isPHI() || I->isPosition()))
+    ++I;
+  // FIXME: This needs to change if we wish to bundle labels
+  // inside the bundle.
+  assert((I == E || !I->isInsideBundle()) &&
+         "First non-phi / non-label instruction is inside a bundle!");
+  return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+  iterator E = end();
   while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
     ++I;
   // FIXME: This needs to change if we wish to bundle labels / dbg_values
   // inside the bundle.
   assert((I == E || !I->isInsideBundle()) &&
-         "First non-phi / non-label instruction is inside a bundle!");
+         "First non-phi / non-label / non-debug "
+         "instruction is inside a bundle!");
   return I;
 }
 
@@ -178,10 +191,7 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
   // Skip over begin-of-block dbg_value instructions.
-  iterator I = begin(), E = end();
-  while (I != E && I->isDebugValue())
-    ++I;
-  return I;
+  return skipDebugInstructionsForward(begin(), end());
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
@@ -276,9 +286,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
   if (!livein_empty()) {
     if (Indexes) OS << '\t';
     OS << "    Live Ins:";
-    for (const auto &LI : make_range(livein_begin(), livein_end())) {
+    for (const auto &LI : LiveIns) {
       OS << ' ' << PrintReg(LI.PhysReg, TRI);
-      if (LI.LaneMask != ~0u)
+      if (!LI.LaneMask.all())
         OS << ':' << PrintLaneMask(LI.LaneMask);
     }
     OS << '\n';
@@ -323,22 +333,20 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS,
 }
 
 void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
-  LiveInVector::iterator I = std::find_if(
-      LiveIns.begin(), LiveIns.end(),
-      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  LiveInVector::iterator I = find_if(
+      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
   if (I == LiveIns.end())
     return;
 
   I->LaneMask &= ~LaneMask;
-  if (I->LaneMask == 0)
+  if (I->LaneMask.none())
     LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
-  livein_iterator I = std::find_if(
-      LiveIns.begin(), LiveIns.end(),
-      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
-  return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+  livein_iterator I = find_if(
+      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  return I != livein_end() && (I->LaneMask & LaneMask).any();
 }
 
 void MachineBasicBlock::sortUniqueLiveIns() {
@@ -418,7 +426,7 @@ void MachineBasicBlock::updateTerminator() {
       // The block has an unconditional branch. If its successor is now its
       // layout successor, delete the branch.
       if (isLayoutSuccessor(TBB))
-        TII->RemoveBranch(*this);
+        TII->removeBranch(*this);
     } else {
       // The block has an unconditional fallthrough. If its successor is not its
       // layout successor, insert a branch. First we have to locate the only
@@ -438,7 +446,7 @@ void MachineBasicBlock::updateTerminator() {
       // Finally update the unconditional successor to be reached via a branch
       // if it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
-        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+        TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     }
     return;
   }
@@ -448,13 +456,13 @@ void MachineBasicBlock::updateTerminator() {
     // successors is its layout successor, rewrite it to a fallthrough
     // conditional branch.
     if (isLayoutSuccessor(TBB)) {
-      if (TII->ReverseBranchCondition(Cond))
+      if (TII->reverseBranchCondition(Cond))
         return;
-      TII->RemoveBranch(*this);
-      TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
+      TII->removeBranch(*this);
+      TII->insertBranch(*this, FBB, nullptr, Cond, DL);
     } else if (isLayoutSuccessor(FBB)) {
-      TII->RemoveBranch(*this);
-      TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+      TII->removeBranch(*this);
+      TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     }
     return;
   }
@@ -476,37 +484,37 @@ void MachineBasicBlock::updateTerminator() {
       // Remove the conditional jump, leaving unconditional fallthrough.
       // FIXME: This does not seem like a reasonable pattern to support, but it
       // has been seen in the wild coming out of degenerate ARM test cases.
-      TII->RemoveBranch(*this);
+      TII->removeBranch(*this);
   
       // Finally update the unconditional successor to be reached via a branch if
       // it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
-        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+        TII->insertBranch(*this, TBB, nullptr, Cond, DL);
       return;
     }
 
     // We enter here iff exactly one successor is TBB which cannot fallthrough
     // and the rest successors if any are EHPads.  In this case, we need to
     // change the conditional branch into unconditional branch.
-    TII->RemoveBranch(*this);
+    TII->removeBranch(*this);
     Cond.clear();
-    TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+    TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     return;
   }
 
   // The block has a fallthrough conditional branch.
   if (isLayoutSuccessor(TBB)) {
-    if (TII->ReverseBranchCondition(Cond)) {
+    if (TII->reverseBranchCondition(Cond)) {
       // We can't reverse the condition, add an unconditional branch.
       Cond.clear();
-      TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+      TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
       return;
     }
-    TII->RemoveBranch(*this);
-    TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+    TII->removeBranch(*this);
+    TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
   } else if (!isLayoutSuccessor(FallthroughBB)) {
-    TII->RemoveBranch(*this);
-    TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
+    TII->removeBranch(*this);
+    TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL);
   }
 }
 
@@ -545,7 +553,7 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
 
 void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
                                         bool NormalizeSuccProbs) {
-  succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+  succ_iterator I = find(Successors, Succ);
   removeSuccessor(I, NormalizeSuccProbs);
 }
 
@@ -611,7 +619,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
 }
 
 void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
-  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
+  pred_iterator I = find(Predecessors, Pred);
   assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
   Predecessors.erase(I);
 }
@@ -661,11 +669,11 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
 }
 
 bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
-  return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+  return is_contained(predecessors(), MBB);
 }
 
 bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
-  return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+  return is_contained(successors(), MBB);
 }
 
 bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -775,7 +783,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
           continue;
 
         unsigned Reg = OI->getReg();
-        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+        if (!is_contained(UsedRegs, Reg))
           UsedRegs.push_back(Reg);
       }
     }
@@ -802,9 +810,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
 
     for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
         E = Terminators.end(); I != E; ++I) {
-      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
-          NewTerminators.end())
-       Indexes->removeMachineInstrFromMaps(**I);
+      if (!is_contained(NewTerminators, *I))
+        Indexes->removeMachineInstrFromMaps(**I);
     }
   }
 
@@ -813,7 +820,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
   if (!NMBB->isLayoutSuccessor(Succ)) {
     SmallVector<MachineOperand, 4> Cond;
     const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
-    TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
+    TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
 
     if (Indexes) {
       for (MachineInstr &MI : NMBB->instrs()) {
@@ -1090,16 +1097,16 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 
   bool Changed = false;
 
-  MachineFunction::iterator FallThru = std::next(getIterator());
+  MachineBasicBlock *FallThru = getNextNode();
 
   if (!DestA && !DestB) {
     // Block falls through to successor.
-    DestA = &*FallThru;
-    DestB = &*FallThru;
+    DestA = FallThru;
+    DestB = FallThru;
   } else if (DestA && !DestB) {
     if (IsCond)
       // Block ends in conditional jump that falls through to successor.
-      DestB = &*FallThru;
+      DestB = FallThru;
   } else {
     assert(DestA && DestB && IsCond &&
            "CFG in a bad state. Cannot correct CFG edges");
@@ -1130,17 +1137,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 /// instructions.  Return UnknownLoc if there is none.
 DebugLoc
 MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
-  DebugLoc DL;
-  instr_iterator E = instr_end();
-  if (MBBI == E)
-    return DL;
-
   // Skip debug declarations, we don't want a DebugLoc from them.
-  while (MBBI != E && MBBI->isDebugValue())
-    MBBI++;
-  if (MBBI != E)
-    DL = MBBI->getDebugLoc();
-  return DL;
+  MBBI = skipDebugInstructionsForward(MBBI, instr_end());
+  if (MBBI != instr_end())
+    return MBBI->getDebugLoc();
+  return {};
 }
 
 /// Return probability of the edge from this block to MBB.
@@ -1287,3 +1288,14 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
   // care what kind of return it is, putting a mask after it is a no-op.
   return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
 }
+
+void MachineBasicBlock::clearLiveIns() {
+  LiveIns.clear();
+}
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+  assert(getParent()->getProperties().hasProperty(
+      MachineFunctionProperties::Property::TracksLiveness) &&
+      "Liveness information is accurate");
+  return LiveIns.begin();
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 6c0f99f..7d5124d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -42,9 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
                           "display a graph using the raw "
                           "integer fractional block frequency representation."),
                clEnumValN(GVDT_Count, "count", "display a graph using the real "
-                                               "profile count if available."),
-
-               clEnumValEnd));
+                                               "profile count if available.")));
 
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 extern cl::opt<unsigned> ViewHotFreqPercent;
@@ -52,29 +50,26 @@ extern cl::opt<unsigned> ViewHotFreqPercent;
 namespace llvm {
 
 template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
-  typedef const MachineBasicBlock NodeType;
+  typedef const MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
-  typedef MachineFunction::const_iterator nodes_iterator;
+  typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
 
-  static inline const NodeType *
-  getEntryNode(const MachineBlockFrequencyInfo *G) {
+  static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
     return &G->getFunction()->front();
   }
 
-  static ChildIteratorType child_begin(const NodeType *N) {
+  static ChildIteratorType child_begin(const NodeRef N) {
     return N->succ_begin();
   }
 
-  static ChildIteratorType child_end(const NodeType *N) {
-    return N->succ_end();
-  }
+  static ChildIteratorType child_end(const NodeRef N) { return N->succ_end(); }
 
   static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return nodes_iterator(G->getFunction()->begin());
   }
 
   static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
-    return G->getFunction()->end();
+    return nodes_iterator(G->getFunction()->end());
   }
 };
 
@@ -175,6 +170,12 @@ Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
   return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
 }
 
+Optional<uint64_t>
+MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+  const Function *F = MBFI->getFunction()->getFunction();
+  return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
+}
+
 const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
   return MBFI ? MBFI->getFunction() : nullptr;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 03dda8b..40e3840 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TailDuplicator.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -78,10 +79,14 @@ static cl::opt<unsigned> ExitBlockBias(
              "over the original exit to be considered the new exit."),
     cl::init(0), cl::Hidden);
 
+// Definition:
+// - Outlining: placement of a basic block outside the chain or hot path.
+
 static cl::opt<bool> OutlineOptionalBranches(
     "outline-optional-branches",
-    cl::desc("Put completely optional branches, i.e. branches with a common "
-             "post dominator, out of line."),
+    cl::desc("Outlining optional branches will place blocks that are optional "
+              "branches, i.e. branches with a common post dominator, outside "
+              "the hot path or chain"),
     cl::init(false), cl::Hidden);
 
 static cl::opt<unsigned> OutlineOptionalThreshold(
@@ -117,6 +122,12 @@ static cl::opt<unsigned> MisfetchCost(
 static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
                                       cl::desc("Cost of jump instructions."),
                                       cl::init(1), cl::Hidden);
+static cl::opt<bool>
+TailDupPlacement("tail-dup-placement",
+              cl::desc("Perform tail duplication during placement. "
+                       "Creates more fallthrough opportunites in "
+                       "outline branches."),
+              cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
 BranchFoldPlacement("branch-fold-placement",
@@ -124,6 +135,14 @@ BranchFoldPlacement("branch-fold-placement",
                        "Reduces code size."),
               cl::init(true), cl::Hidden);
 
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+    "tail-dup-placement-threshold",
+    cl::desc("Instruction cutoff for tail duplication during layout. "
+             "Tail merging during layout is forced to have a threshold "
+             "that won't conflict."), cl::init(2),
+    cl::Hidden);
+
 extern cl::opt<unsigned> StaticLikelyProb;
 extern cl::opt<unsigned> ProfileLikelyProb;
 
@@ -181,6 +200,16 @@ public:
   /// \brief End of blocks within the chain.
   iterator end() { return Blocks.end(); }
 
+  bool remove(MachineBasicBlock* BB) {
+    for(iterator i = begin(); i != end(); ++i) {
+      if (*i == BB) {
+        Blocks.erase(i);
+        return true;
+      }
+    }
+    return false;
+  }
+
   /// \brief Merge a block chain into this one.
   ///
   /// This routine merges a block chain into this one. It takes care of forming
@@ -235,7 +264,7 @@ public:
 namespace {
 class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A typedef for a block filter set.
-  typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+  typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
 
   /// \brief work lists of blocks that are ready to be laid out
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
@@ -253,6 +282,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the loop info.
   MachineLoopInfo *MLI;
 
+  /// \brief Preferred loop exit.
+  /// Member variable for convenience. It may be removed by duplication deep
+  /// in the call stack.
+  MachineBasicBlock *PreferredLoopExit;
+
   /// \brief A handle to the target's instruction info.
   const TargetInstrInfo *TII;
 
@@ -262,6 +296,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the post dominator tree.
   MachineDominatorTree *MDT;
 
+  /// \brief Duplicator used to duplicate tails during placement.
+  ///
+  /// Placement decisions can open up new tail duplication opportunities, but
+  /// since tail duplication affects placement decisions of later blocks, it
+  /// must be done inline.
+  TailDuplicator TailDup;
+
   /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
   /// all terminators of the MachineFunction.
   SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
@@ -283,8 +324,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// between basic blocks.
   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
 
+#ifndef NDEBUG
+  /// The set of basic blocks that have terminators that cannot be fully
+  /// analyzed.  These basic blocks cannot be re-ordered safely by
+  /// MachineBlockPlacement, and we must preserve physical layout of these
+  /// blocks and their successors through the pass.
+  SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
+#endif
+
+  /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
+  /// if the count goes to 0, add them to the appropriate work list.
   void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
                            const BlockFilterSet *BlockFilter = nullptr);
+
+  /// Decrease the UnscheduledPredecessors count for a single block, and
+  /// if the count goes to 0, add them to the appropriate work list.
+  void markBlockSuccessors(
+      BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+      const BlockFilterSet *BlockFilter = nullptr);
+
+
   BranchProbability
   collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
                           const BlockFilterSet *BlockFilter,
@@ -294,6 +353,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
                                  const BlockFilterSet *BlockFilter,
                                  BranchProbability SuccProb,
                                  BranchProbability HotProb);
+  bool repeatedlyTailDuplicateBlock(
+      MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+      MachineBasicBlock *LoopHeaderBB,
+      BlockChain &Chain, BlockFilterSet *BlockFilter,
+      MachineFunction::iterator &PrevUnplacedBlockIt);
+  bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+                               const BlockChain &Chain,
+                               BlockFilterSet *BlockFilter,
+                               MachineFunction::iterator &PrevUnplacedBlockIt,
+                               bool &DuplicatedToPred);
   bool
   hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
                              BlockChain &SuccChain, BranchProbability SuccProb,
@@ -319,7 +388,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
                      const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
-                  const BlockFilterSet *BlockFilter = nullptr);
+                  BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
                                      const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(MachineLoop &L,
@@ -384,37 +453,49 @@ static std::string getBlockName(MachineBasicBlock *BB) {
 /// When a chain is being merged into the "placed" chain, this routine will
 /// quickly walk the successors of each block in the chain and mark them as
 /// having one fewer active predecessor. It also adds any successors of this
-/// chain which reach the zero-predecessor state to the worklist passed in.
+/// chain which reach the zero-predecessor state to the appropriate worklist.
 void MachineBlockPlacement::markChainSuccessors(
     BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
     const BlockFilterSet *BlockFilter) {
   // Walk all the blocks in this chain, marking their successors as having
   // a predecessor placed.
   for (MachineBasicBlock *MBB : Chain) {
-    // Add any successors for which this is the only un-placed in-loop
-    // predecessor to the worklist as a viable candidate for CFG-neutral
-    // placement. No subsequent placement of this block will violate the CFG
-    // shape, so we get to use heuristics to choose a favorable placement.
-    for (MachineBasicBlock *Succ : MBB->successors()) {
-      if (BlockFilter && !BlockFilter->count(Succ))
-        continue;
-      BlockChain &SuccChain = *BlockToChain[Succ];
-      // Disregard edges within a fixed chain, or edges to the loop header.
-      if (&Chain == &SuccChain || Succ == LoopHeaderBB)
-        continue;
+    markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
+  }
+}
 
-      // This is a cross-chain edge that is within the loop, so decrement the
-      // loop predecessor count of the destination chain.
-      if (SuccChain.UnscheduledPredecessors == 0 ||
-          --SuccChain.UnscheduledPredecessors > 0)
-        continue;
+/// \brief Mark a single block's successors as having one fewer preds.
+///
+/// Under normal circumstances, this is only called by markChainSuccessors,
+/// but if a block that was to be placed is completely tail-duplicated away,
+/// and was duplicated into the chain end, we need to redo markBlockSuccessors
+/// for just that block.
+void MachineBlockPlacement::markBlockSuccessors(
+    BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
+    const BlockFilterSet *BlockFilter) {
+  // Add any successors for which this is the only un-placed in-loop
+  // predecessor to the worklist as a viable candidate for CFG-neutral
+  // placement. No subsequent placement of this block will violate the CFG
+  // shape, so we get to use heuristics to choose a favorable placement.
+  for (MachineBasicBlock *Succ : MBB->successors()) {
+    if (BlockFilter && !BlockFilter->count(Succ))
+      continue;
+    BlockChain &SuccChain = *BlockToChain[Succ];
+    // Disregard edges within a fixed chain, or edges to the loop header.
+    if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+      continue;
 
-      auto *MBB = *SuccChain.begin();
-      if (MBB->isEHPad())
-        EHPadWorkList.push_back(MBB);
-      else
-        BlockWorkList.push_back(MBB);
-    }
+    // This is a cross-chain edge that is within the loop, so decrement the
+    // loop predecessor count of the destination chain.
+    if (SuccChain.UnscheduledPredecessors == 0 ||
+        --SuccChain.UnscheduledPredecessors > 0)
+      continue;
+
+    auto *NewBB = *SuccChain.begin();
+    if (NewBB->isEHPad())
+      EHPadWorkList.push_back(NewBB);
+    else
+      BlockWorkList.push_back(NewBB);
   }
 }
 
@@ -627,16 +708,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   // BB->Succ. This is equivalent to looking the CFG backward with backward
   // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
   // profile data).
-
+  // --------------------------------------------------------------------------
+  // Case 3: forked diamond
+  //       S
+  //      / \
+  //     /   \
+  //   BB    Pred
+  //   | \   / |
+  //   |  \ /  |
+  //   |   X   |
+  //   |  / \  |
+  //   | /   \ |
+  //   S1     S2
+  //
+  // The current block is BB and edge BB->S1 is now being evaluated.
+  // As above S->BB was already selected because
+  // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
+  //
+  // topo-order:
+  //
+  //     S-------|                     ---S
+  //     |       |                     |  |
+  //  ---BB      |                     |  BB
+  //  |          |                     |  |
+  //  |  Pred----|                     |  S1----
+  //  |  |                             |       |
+  //  --(S1 or S2)                     ---Pred--
+  //
+  // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
+  //    + min(freq(Pred->S1), freq(Pred->S2))
+  // Non-topo-order cost:
+  // In the worst case, S2 will not get laid out after Pred.
+  // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
+  // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
+  // is 0. Then the non topo layout is better when
+  // freq(S->Pred) < freq(BB->S1).
+  // This is exactly what is checked below.
+  // Note there are other shapes that apply (Pred may not be a single block,
+  // but they all fit this general pattern.)
   BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
 
-  // Forward checking. For case 2, SuccProb will be 1.
-  if (SuccProb < HotProb) {
-    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
-                 << " (prob) (CFG conflict)\n");
-    return true;
-  }
-
   // Make sure that a hot successor doesn't have a globally more
   // important predecessor.
   BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
@@ -647,11 +758,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
         (BlockFilter && !BlockFilter->count(Pred)) ||
         BlockToChain[Pred] == &Chain)
       continue;
-    // Do backward checking. For case 1, it is actually redundant check. For
-    // case 2 above, we need a backward checking to filter out edges that are
-    // not 'strongly' biased. With profile data available, the check is mostly
-    // redundant too (when threshold prob is set at 50%) unless S has more than
-    // two successors.
+    // Do backward checking.
+    // For all cases above, we need a backward checking to filter out edges that
+    // are not 'strongly' biased. With profile data available, the check is
+    // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
+    // has more than two successors.
     // BB  Pred
     //  \ /
     //  Succ
@@ -660,6 +771,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
     //      i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
     //      HotProb
     //      i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+    // Case 1 is covered too, because the first equation reduces to:
+    // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
     BlockFrequency PredEdgeFreq =
         MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
     if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
@@ -669,7 +782,7 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   }
 
   if (BadCFGConflict) {
-    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
+    DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb
                  << " (prob) (non-cold CFG conflict)\n");
     return true;
   }
@@ -699,7 +812,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
   auto AdjustedSumProb =
       collectViableSuccessors(BB, Chain, BlockFilter, Successors);
 
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
   for (MachineBasicBlock *Succ : Successors) {
     auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
     BranchProbability SuccProb =
@@ -718,15 +831,23 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
       continue;
 
     DEBUG(
-        dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
-               << " (prob)"
+        dbgs() << "    Candidate: " << getBlockName(Succ) << ", probability: "
+               << SuccProb
                << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
                << "\n");
-    if (BestSucc && BestProb >= SuccProb)
+
+    if (BestSucc && BestProb >= SuccProb) {
+      DEBUG(dbgs() << "    Not the best candidate, continuing\n");
       continue;
+    }
+
+    DEBUG(dbgs() << "    Setting it as best candidate\n");
     BestSucc = Succ;
     BestProb = SuccProb;
   }
+  if (BestSucc)
+    DEBUG(dbgs() << "    Selected: " << getBlockName(BestSucc) << "\n");
+
   return BestSucc;
 }
 
@@ -746,10 +867,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   // worklist of already placed entries.
   // FIXME: If this shows up on profiles, it could be folded (at the cost of
   // some code complexity) into the loop below.
-  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
-                                [&](MachineBasicBlock *BB) {
-                                  return BlockToChain.lookup(BB) == &Chain;
-                                }),
+  WorkList.erase(remove_if(WorkList,
+                           [&](MachineBasicBlock *BB) {
+                             return BlockToChain.lookup(BB) == &Chain;
+                           }),
                  WorkList.end());
 
   if (WorkList.empty())
@@ -858,7 +979,7 @@ void MachineBlockPlacement::fillWorkLists(
 
 void MachineBlockPlacement::buildChain(
     MachineBasicBlock *BB, BlockChain &Chain,
-    const BlockFilterSet *BlockFilter) {
+    BlockFilterSet *BlockFilter) {
   assert(BB && "BB must not be null.\n");
   assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
@@ -893,6 +1014,17 @@ void MachineBlockPlacement::buildChain(
                       "layout successor until the CFG reduces\n");
     }
 
+    // Placement may have changed tail duplication opportunities.
+    // Check for that now.
+    if (TailDupPlacement && BestSucc) {
+      // If the chosen successor was duplicated into all its predecessors,
+      // don't bother laying it out, just go round the loop again with BB as
+      // the chain end.
+      if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+                                       BlockFilter, PrevUnplacedBlockIt))
+        continue;
+    }
+
     // Place this block, updating the datastructures to reflect its placement.
     BlockChain &SuccChain = *BlockToChain[BestSucc];
     // Zero out UnscheduledPredecessors for the successor we're about to merge in case
@@ -922,6 +1054,16 @@ void MachineBlockPlacement::buildChain(
 MachineBasicBlock *
 MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
                                        const BlockFilterSet &LoopBlockSet) {
+  // Placing the latch block before the header may introduce an extra branch
+  // that skips this block the first time the loop is executed, which we want
+  // to avoid when optimising for size.
+  // FIXME: in theory there is a case that does not introduce a new branch,
+  // i.e. when the layout predecessor does not fallthrough to the loop header.
+  // In practice this never happens though: there always seems to be a preheader
+  // that can fallthrough and that is also placed before the header.
+  if (F->getFunction()->optForSize())
+    return L.getHeader();
+
   // Check that the header hasn't been fused with a preheader block due to
   // crazy branches. If it has, we need to start with the header at the top to
   // prevent pulling the preheader into the loop body.
@@ -937,7 +1079,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
-    DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
+    DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
                  << Pred->succ_size() << " successors, ";
           MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
     if (Pred->succ_size() > 1)
@@ -1066,8 +1208,14 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
   }
   // Without a candidate exiting block or with only a single block in the
   // loop, just use the loop header to layout the loop.
-  if (!ExitingBB || L.getNumBlocks() == 1)
+  if (!ExitingBB) {
+    DEBUG(dbgs() << "    No other candidate exit blocks, using loop header\n");
     return nullptr;
+  }
+  if (L.getNumBlocks() == 1) {
+    DEBUG(dbgs() << "    Loop has 1 block, using loop header as exit\n");
+    return nullptr;
+  }
 
   // Also, if we have exit blocks which lead to outer loops but didn't select
   // one of them as the exiting block we are rotating toward, disable loop
@@ -1116,8 +1264,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
     }
   }
 
-  BlockChain::iterator ExitIt =
-      std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
+  BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
   if (ExitIt == LoopChain.end())
     return;
 
@@ -1140,7 +1287,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 void MachineBlockPlacement::rotateLoopWithProfile(
     BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
   auto HeaderBB = L.getHeader();
-  auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+  auto HeaderIter = find(LoopChain, HeaderBB);
   auto RotationPos = LoopChain.end();
 
   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -1340,9 +1487,8 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
   // branches by placing an exit edge at the bottom.
-  MachineBasicBlock *ExitingBB = nullptr;
   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
-    ExitingBB = findBestLoopExit(L, LoopBlockSet);
+    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
 
   BlockChain &LoopChain = *BlockToChain[LoopTop];
 
@@ -1361,7 +1507,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
   if (RotateLoopWithProfile)
     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
   else
-    rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+    rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
 
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -1374,7 +1520,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
     }
     for (MachineBasicBlock *ChainBB : LoopChain) {
       dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
-      if (!LoopBlockSet.erase(ChainBB)) {
+      if (!LoopBlockSet.remove(ChainBB)) {
         // We don't mark the loop as bad here because there are real situations
         // where this can occur. For example, with an unanalyzable fallthrough
         // from a loop block to a non-loop block or vice versa.
@@ -1451,6 +1597,9 @@ void MachineBlockPlacement::buildCFGChains() {
                    << getBlockName(BB) << " -> " << getBlockName(NextBB)
                    << "\n");
       Chain->merge(NextBB, nullptr);
+#ifndef NDEBUG
+      BlocksWithUnanalyzableExits.insert(&*BB);
+#endif
       FI = NextFI;
       BB = NextBB;
     }
@@ -1460,6 +1609,7 @@ void MachineBlockPlacement::buildCFGChains() {
   collectMustExecuteBBs();
 
   // Build any loop-based chains.
+  PreferredLoopExit = nullptr;
   for (MachineLoop *L : *MLI)
     buildLoopChains(*L);
 
@@ -1522,6 +1672,19 @@ void MachineBlockPlacement::buildCFGChains() {
     Cond.clear();
     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
 
+#ifndef NDEBUG
+    if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
+      // Given the exact block placement we chose, we may actually not _need_ to
+      // be able to edit PrevBB's terminator sequence, but not being _able_ to
+      // do that at this point is a bug.
+      assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
+              !PrevBB->canFallThrough()) &&
+             "Unexpected block with un-analyzable fallthrough!");
+      Cond.clear();
+      TBB = FBB = nullptr;
+    }
+#endif
+
     // The "PrevBB" is not yet updated to reflect current code layout, so,
     //   o. it may fall-through to a block without explicit "goto" instruction
     //      before layout, and no longer fall-through it after layout; or
@@ -1576,15 +1739,15 @@ void MachineBlockPlacement::optimizeBranches() {
       if (TBB && !Cond.empty() && FBB &&
           MBPI->getEdgeProbability(ChainBB, FBB) >
               MBPI->getEdgeProbability(ChainBB, TBB) &&
-          !TII->ReverseBranchCondition(Cond)) {
+          !TII->reverseBranchCondition(Cond)) {
         DEBUG(dbgs() << "Reverse order of the two branches: "
                      << getBlockName(ChainBB) << "\n");
         DEBUG(dbgs() << "    Edge probability: "
                      << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
                      << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
         DebugLoc dl; // FIXME: this is nowhere
-        TII->RemoveBranch(*ChainBB);
-        TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+        TII->removeBranch(*ChainBB);
+        TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
         ChainBB->updateTerminator();
       }
     }
@@ -1659,6 +1822,175 @@ void MachineBlockPlacement::alignBlocks() {
   }
 }
 
+/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
+/// it was duplicated into its chain predecessor and removed.
+/// \p BB    - Basic block that may be duplicated.
+///
+/// \p LPred - Chosen layout predecessor of \p BB.
+///            Updated to be the chain end if LPred is removed.
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+///                  Used to identify which blocks to update predecessor
+///                  counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+///                          chosen in the given order due to unnatural CFG
+///                          only needed if \p BB is removed and
+///                          \p PrevUnplacedBlockIt pointed to \p BB.
+/// @return true if \p BB was removed.
+bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
+    MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+    MachineBasicBlock *LoopHeaderBB,
+    BlockChain &Chain, BlockFilterSet *BlockFilter,
+    MachineFunction::iterator &PrevUnplacedBlockIt) {
+  bool Removed, DuplicatedToLPred;
+  bool DuplicatedToOriginalLPred;
+  Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
+                                    PrevUnplacedBlockIt,
+                                    DuplicatedToLPred);
+  if (!Removed)
+    return false;
+  DuplicatedToOriginalLPred = DuplicatedToLPred;
+  // Iteratively try to duplicate again. It can happen that a block that is
+  // duplicated into is still small enough to be duplicated again.
+  // No need to call markBlockSuccessors in this case, as the blocks being
+  // duplicated from here on are already scheduled.
+  // Note that DuplicatedToLPred always implies Removed.
+  while (DuplicatedToLPred) {
+    assert (Removed && "Block must have been removed to be duplicated into its "
+            "layout predecessor.");
+    MachineBasicBlock *DupBB, *DupPred;
+    // The removal callback causes Chain.end() to be updated when a block is
+    // removed. On the first pass through the loop, the chain end should be the
+    // same as it was on function entry. On subsequent passes, because we are
+    // duplicating the block at the end of the chain, if it is removed the
+    // chain will have shrunk by one block.
+    BlockChain::iterator ChainEnd = Chain.end();
+    DupBB = *(--ChainEnd);
+    // Now try to duplicate again.
+    if (ChainEnd == Chain.begin())
+      break;
+    DupPred = *std::prev(ChainEnd);
+    Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
+                                      PrevUnplacedBlockIt,
+                                      DuplicatedToLPred);
+  }
+  // If BB was duplicated into LPred, it is now scheduled. But because it was
+  // removed, markChainSuccessors won't be called for its chain. Instead we
+  // call markBlockSuccessors for LPred to achieve the same effect. This must go
+  // at the end because repeating the tail duplication can increase the number
+  // of unscheduled predecessors.
+  LPred = *std::prev(Chain.end());
+  if (DuplicatedToOriginalLPred)
+    markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
+  return true;
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable.
+/// \p BB    - Basic block that may be duplicated
+/// \p LPred - Chosen layout predecessor of \p BB
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+///                  Used to identify which blocks to update predecessor
+///                  counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+///                          chosen in the given order due to unnatural CFG
+///                          only needed if \p BB is removed and
+///                          \p PrevUnplacedBlockIt pointed to \p BB.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
+///                        only be true if the block was removed.
+/// \return  - True if the block was duplicated into all preds and removed.
+bool MachineBlockPlacement::maybeTailDuplicateBlock(
+    MachineBasicBlock *BB, MachineBasicBlock *LPred,
+    const BlockChain &Chain, BlockFilterSet *BlockFilter,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    bool &DuplicatedToLPred) {
+
+  DuplicatedToLPred = false;
+  DEBUG(dbgs() << "Redoing tail duplication for Succ#"
+        << BB->getNumber() << "\n");
+  bool IsSimple = TailDup.isSimpleBB(BB);
+  // Blocks with single successors don't create additional fallthrough
+  // opportunities. Don't duplicate them. TODO: When conditional exits are
+  // analyzable, allow them to be duplicated.
+  if (!IsSimple && BB->succ_size() == 1)
+    return false;
+  if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
+    return false;
+  // This has to be a callback because none of it can be done after
+  // BB is deleted.
+  bool Removed = false;
+  auto RemovalCallback =
+      [&](MachineBasicBlock *RemBB) {
+        // Signal to outer function
+        Removed = true;
+
+        // Conservative default.
+        bool InWorkList = true;
+        // Remove from the Chain and Chain Map
+        if (BlockToChain.count(RemBB)) {
+          BlockChain *Chain = BlockToChain[RemBB];
+          InWorkList = Chain->UnscheduledPredecessors == 0;
+          Chain->remove(RemBB);
+          BlockToChain.erase(RemBB);
+        }
+
+        // Handle the unplaced block iterator
+        if (&(*PrevUnplacedBlockIt) == RemBB) {
+          PrevUnplacedBlockIt++;
+        }
+
+        // Handle the Work Lists
+        if (InWorkList) {
+          SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
+          if (RemBB->isEHPad())
+            RemoveList = EHPadWorkList;
+          RemoveList.erase(
+              remove_if(RemoveList,
+                        [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+              RemoveList.end());
+        }
+
+        // Handle the filter set
+        if (BlockFilter) {
+          BlockFilter->remove(RemBB);
+        }
+
+        // Remove the block from loop info.
+        MLI->removeBlock(RemBB);
+        if (RemBB == PreferredLoopExit)
+          PreferredLoopExit = nullptr;
+
+        DEBUG(dbgs() << "TailDuplicator deleted block: "
+              << getBlockName(RemBB) << "\n");
+      };
+  auto RemovalCallbackRef =
+      llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+
+  SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+  TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
+                                 &DuplicatedPreds, &RemovalCallbackRef);
+
+  // Update UnscheduledPredecessors to reflect tail-duplication.
+  DuplicatedToLPred = false;
+  for (MachineBasicBlock *Pred : DuplicatedPreds) {
+    // We're only looking for unscheduled predecessors that match the filter.
+    BlockChain* PredChain = BlockToChain[Pred];
+    if (Pred == LPred)
+      DuplicatedToLPred = true;
+    if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
+        || PredChain == &Chain)
+      continue;
+    for (MachineBasicBlock *NewSucc : Pred->successors()) {
+      if (BlockFilter && !BlockFilter->count(NewSucc))
+        continue;
+      BlockChain *NewChain = BlockToChain[NewSucc];
+      if (NewChain != &Chain && NewChain != PredChain)
+        NewChain->UnscheduledPredecessors++;
+    }
+  }
+  return Removed;
+}
+
 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -1675,6 +2007,18 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TLI = MF.getSubtarget().getTargetLowering();
   MDT = &getAnalysis<MachineDominatorTree>();
+
+  // Initialize PreferredLoopExit to nullptr here since it may never be set if
+  // there are no MachineLoops.
+  PreferredLoopExit = nullptr;
+
+  if (TailDupPlacement) {
+    unsigned TailDupSize = TailDuplicatePlacementThreshold;
+    if (MF.getFunction()->optForSize())
+      TailDupSize = 1;
+    TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+  }
+
   assert(BlockToChain.empty());
 
   buildCFGChains();
@@ -1688,14 +2032,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
                          BranchFoldPlacement;
   // No tail merging opportunities if the block number is less than four.
   if (MF.size() > 3 && EnableTailMerge) {
+    unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
     BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
-                    *MBPI);
+                    *MBPI, TailMergeSize);
 
     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
                             getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
                             /*AfterBlockPlacement=*/true)) {
       // Redo the layout if tail merging creates/removes/moves blocks.
       BlockToChain.clear();
+      // Must redo the dominator tree if blocks were changed.
+      MDT->runOnMachineFunction(MF);
       ChainAllocator.DestroyAll();
       buildCFGChains();
     }
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index fe73406..21eff9d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -50,8 +50,7 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
     const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
   // This is a linear search. Try to use the const_succ_iterator version when
   // possible.
-  return getEdgeProbability(Src,
-                            std::find(Src->succ_begin(), Src->succ_end(), Dst));
+  return getEdgeProbability(Src, find(Src->successors(), Dst));
 }
 
 bool MachineBranchProbabilityInfo::isEdgeHot(
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 1209f73..0766f46 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -177,8 +177,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
   unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
-    while (I != E && I->isDebugValue())
-      ++I;
+    I = skipDebugInstructionsForward(I, E);
 
     if (I == E)
       // Reached end of block, register is obviously dead.
@@ -227,7 +226,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     // Reading constant physregs is ok.
-    if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+    if (!MRI->isConstantPhysReg(Reg))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
   }
@@ -346,7 +345,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
-    if (!MI->isInvariantLoad(AA))
+    if (!MI->isDereferenceableInvariantLoad(AA))
       // FIXME: we should be able to hoist loads with no other side effects if
       // there are no other instructions which can change memory in this loop.
       // This is a trivial form of alias analysis.
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 6b5c6ba..5beed5f 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -56,7 +56,7 @@ public:
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override { return "Machine InstCombiner"; }
+  StringRef getPassName() const override { return "Machine InstCombiner"; }
 
 private:
   bool doSubstitute(unsigned NewSize, unsigned OldSize);
@@ -71,6 +71,7 @@ private:
   improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
                           MachineTraceMetrics::Trace BlockTrace,
                           SmallVectorImpl<MachineInstr *> &InsInstrs,
+                          SmallVectorImpl<MachineInstr *> &DelInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
                           MachineCombinerPattern Pattern);
   bool preservesResourceLen(MachineBasicBlock *MBB,
@@ -134,7 +135,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
   for (auto *InstrPtr : InsInstrs) { // for each Use
     unsigned IDepth = 0;
-    DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+    DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
     for (const MachineOperand &MO : InstrPtr->operands()) {
       // Check for virtual register operand.
       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -242,6 +243,7 @@ bool MachineCombiner::improvesCriticalPathLen(
     MachineBasicBlock *MBB, MachineInstr *Root,
     MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
     MachineCombinerPattern Pattern) {
   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
@@ -269,8 +271,13 @@ bool MachineCombiner::improvesCriticalPathLen(
   // A more flexible cost calculation for the critical path includes the slack
   // of the original code sequence. This may allow the transform to proceed
   // even if the instruction depths (data dependency cycles) become worse.
+
   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
-  unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+  unsigned RootLatency = 0;
+
+  for (auto I : DelInstrs)
+    RootLatency += TSchedModel.computeInstrLatency(I);
+
   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
 
   DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
@@ -421,7 +428,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       // resource pressure.
       if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
           (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                   InstrIdxForVirtReg, P) &&
+                                   DelInstrs, InstrIdxForVirtReg, P) &&
            preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
         for (auto *InstrPtr : InsInstrs)
           MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 8fdf39d..5de6dec 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -56,11 +56,12 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
     void ClobberRegister(unsigned Reg);
+    void ReadRegister(unsigned Reg);
     void CopyPropagateBlock(MachineBasicBlock &MBB);
     bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
 
@@ -120,6 +121,18 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
   }
 }
 
+void MachineCopyPropagation::ReadRegister(unsigned Reg) {
+  // If 'Reg' is defined by a copy, the copy is no longer a candidate
+  // for elimination.
+  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+    Reg2MIMap::iterator CI = CopyMap.find(*AI);
+    if (CI != CopyMap.end()) {
+      DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
+      MaybeDeadCopies.remove(CI->second);
+    }
+  }
+}
+
 /// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
 /// This fact may have been obscured by sub register usage or may not be true at
 /// all even though Src and Def are subregisters of the registers used in
@@ -212,12 +225,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
 
       // If Src is defined by a previous copy, the previous copy cannot be
       // eliminated.
-      for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
-        Reg2MIMap::iterator CI = CopyMap.find(*AI);
-        if (CI != CopyMap.end()) {
-          DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
-          MaybeDeadCopies.remove(CI->second);
-        }
+      ReadRegister(Src);
+      for (const MachineOperand &MO : MI->implicit_operands()) {
+        if (!MO.isReg() || !MO.readsReg())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        ReadRegister(Reg);
       }
 
       DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
@@ -234,6 +249,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       // ...
       // %xmm2<def> = copy %xmm9
       ClobberRegister(Def);
+      for (const MachineOperand &MO : MI->implicit_operands()) {
+        if (!MO.isReg() || !MO.isDef())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        ClobberRegister(Reg);
+      }
 
       // Remember Def is defined by the copy.
       for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
@@ -245,7 +268,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       // Remember source that's copied to Def. Once it's clobbered, then
       // it's no longer available for copy propagation.
       RegList &DestList = SrcMap[Src];
-      if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end())
+      if (!is_contained(DestList, Def))
         DestList.push_back(Def);
 
       continue;
@@ -268,17 +291,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
 
       if (MO.isDef()) {
         Defs.push_back(Reg);
-        continue;
-      }
-
-      // If 'Reg' is defined by a copy, the copy is no longer a candidate
-      // for elimination.
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-        Reg2MIMap::iterator CI = CopyMap.find(*AI);
-        if (CI != CopyMap.end()) {
-          DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
-          MaybeDeadCopies.remove(CI->second);
-        }
+      } else {
+        ReadRegister(Reg);
       }
       // Treat undef use like defs for copy propagation but not for
       // dead copy. We would need to do a liveness check to be sure the copy
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index a7c63ef..c1d5ea9 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,28 +54,29 @@ static cl::opt<unsigned>
 
 void MachineFunctionInitializer::anchor() {}
 
-void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
-  // Leave this function even in NDEBUG as an out-of-line anchor.
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  for (BitVector::size_type i = 0; i < Properties.size(); ++i) {
-    bool HasProperty = Properties[i];
-    if (OnlySet && !HasProperty)
+static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
+  typedef MachineFunctionProperties::Property P;
+  switch(Prop) {
+  case P::FailedISel: return "FailedISel";
+  case P::IsSSA: return "IsSSA";
+  case P::Legalized: return "Legalized";
+  case P::NoPHIs: return "NoPHIs";
+  case P::NoVRegs: return "NoVRegs";
+  case P::RegBankSelected: return "RegBankSelected";
+  case P::Selected: return "Selected";
+  case P::TracksLiveness: return "TracksLiveness";
+  }
+  llvm_unreachable("Invalid machine function property");
+}
+
+void MachineFunctionProperties::print(raw_ostream &OS) const {
+  const char *Separator = "";
+  for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
+    if (!Properties[I])
       continue;
-    switch(static_cast<Property>(i)) {
-      case Property::IsSSA:
-        ROS << (HasProperty ? "SSA, " : "Post SSA, ");
-        break;
-      case Property::TracksLiveness:
-        ROS << (HasProperty ? "" : "not ") << "tracking liveness, ";
-        break;
-      case Property::AllVRegsAllocated:
-        ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs");
-        break;
-      default:
-        break;
-    }
+    OS << Separator << getPropertyName(static_cast<Property>(I));
+    Separator = ", ";
   }
-#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -85,7 +86,7 @@ void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
 // Out-of-line virtual method.
 MachineFunctionInfo::~MachineFunctionInfo() {}
 
-void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
@@ -100,6 +101,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
     : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
       MMI(mmi) {
+  FunctionNumber = FunctionNum;
+  init();
+}
+
+void MachineFunction::init() {
   // Assume the function starts in SSA form with correct liveness.
   Properties.set(MachineFunctionProperties::Property::IsSSA);
   Properties.set(MachineFunctionProperties::Property::TracksLiveness);
@@ -112,11 +118,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   // We can realign the stack if the target supports it and the user hasn't
   // explicitly asked us not to.
   bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
-                      !F->hasFnAttribute("no-realign-stack");
+                      !Fn->hasFnAttribute("no-realign-stack");
   FrameInfo = new (Allocator) MachineFrameInfo(
       getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
       /*ForceRealign=*/CanRealignSP &&
-          F->hasFnAttribute(Attribute::StackAlignment));
+          Fn->hasFnAttribute(Attribute::StackAlignment));
 
   if (Fn->hasFnAttribute(Attribute::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
@@ -133,15 +139,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   if (AlignAllFunctions)
     Alignment = AlignAllFunctions;
 
-  FunctionNumber = FunctionNum;
   JumpTableInfo = nullptr;
 
   if (isFuncletEHPersonality(classifyEHPersonality(
-          F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+          Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) {
     WinEHInfo = new (Allocator) WinEHFuncInfo();
   }
 
-  assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+  assert(Target.isCompatibleDataLayout(getDataLayout()) &&
          "Can't create a MachineFunction using a Module with a "
          "Target-incompatible DataLayout attached\n");
 
@@ -149,6 +154,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
 }
 
 MachineFunction::~MachineFunction() {
+  clear();
+}
+
+void MachineFunction::clear() {
+  Properties.reset();
   // Don't call destructors on MachineInstr and MachineOperand. All of their
   // memory comes from the BumpPtrAllocator which is about to be purged.
   //
@@ -296,9 +306,12 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 
 MachineMemOperand *MachineFunction::getMachineMemOperand(
     MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
-    unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) {
+    unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+    SynchronizationScope SynchScope, AtomicOrdering Ordering,
+    AtomicOrdering FailureOrdering) {
   return new (Allocator)
-      MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges);
+      MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
+                        SynchScope, Ordering, FailureOrdering);
 }
 
 MachineMemOperand *
@@ -308,13 +321,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
     return new (Allocator)
                MachineMemOperand(MachinePointerInfo(MMO->getValue(),
                                                     MMO->getOffset()+Offset),
-                                 MMO->getFlags(), Size,
-                                 MMO->getBaseAlignment());
+                                 MMO->getFlags(), Size, MMO->getBaseAlignment(),
+                                 AAMDNodes(), nullptr, MMO->getSynchScope(),
+                                 MMO->getOrdering(), MMO->getFailureOrdering());
   return new (Allocator)
              MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
                                                   MMO->getOffset()+Offset),
-                               MMO->getFlags(), Size,
-                               MMO->getBaseAlignment());
+                               MMO->getFlags(), Size, MMO->getBaseAlignment(),
+                               AAMDNodes(), nullptr, MMO->getSynchScope(),
+                               MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
 MachineInstr::mmo_iterator
@@ -345,7 +360,9 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
           getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOStore,
                                (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo());
+                               (*I)->getAAInfo(), nullptr,
+                               (*I)->getSynchScope(), (*I)->getOrdering(),
+                               (*I)->getFailureOrdering());
         Result[Index] = JustLoad;
       }
       ++Index;
@@ -377,7 +394,9 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
           getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOLoad,
                                (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo());
+                               (*I)->getAAInfo(), nullptr,
+                               (*I)->getSynchScope(), (*I)->getOrdering(),
+                               (*I)->getFailureOrdering());
         Result[Index] = JustStore;
       }
       ++Index;
@@ -406,9 +425,8 @@ StringRef MachineFunction::getName() const {
 
 void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
   OS << "# Machine code for function " << getName() << ": ";
-  OS << "Properties: <";
   getProperties().print(OS);
-  OS << ">\n";
+  OS << '\n';
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -535,8 +553,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
 
-  const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
-                                       : DL.getPrivateGlobalPrefix();
+  StringRef Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+                                     : DL.getPrivateGlobalPrefix();
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -550,6 +568,193 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
                                Twine(getFunctionNumber()) + "$pb");
 }
 
+/// \name Exception Handling
+/// \{
+
+LandingPadInfo &
+MachineFunction::getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad) {
+  unsigned N = LandingPads.size();
+  for (unsigned i = 0; i < N; ++i) {
+    LandingPadInfo &LP = LandingPads[i];
+    if (LP.LandingPadBlock == LandingPad)
+      return LP;
+  }
+
+  LandingPads.push_back(LandingPadInfo(LandingPad));
+  return LandingPads[N];
+}
+
+void MachineFunction::addInvoke(MachineBasicBlock *LandingPad,
+                                MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.BeginLabels.push_back(BeginLabel);
+  LP.EndLabels.push_back(EndLabel);
+}
+
+MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.LandingPadLabel = LandingPadLabel;
+  return LandingPadLabel;
+}
+
+void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                                       ArrayRef<const GlobalValue *> TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  for (unsigned N = TyInfo.size(); N; --N)
+    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                                        ArrayRef<const GlobalValue *> TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  std::vector<unsigned> IdsInFilter(TyInfo.size());
+  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+  for (unsigned i = 0; i != LandingPads.size(); ) {
+    LandingPadInfo &LandingPad = LandingPads[i];
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+      LandingPad.LandingPadLabel = nullptr;
+
+    // Special case: we *should* emit LPs with null LP MBB. This indicates
+    // "nounwind" case.
+    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j;
+      --e;
+    }
+
+    // Remove landing pads with no try-ranges.
+    if (LandingPads[i].BeginLabels.empty()) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    // If there is no landing pad, ensure that the list of typeids is empty.
+    // If the only typeid is a cleanup, this is the same as having no typeids.
+    if (!LandingPad.LandingPadBlock ||
+        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+      LandingPad.TypeIds.clear();
+    ++i;
+  }
+}
+
+void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.TypeIds.push_back(0);
+}
+
+void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+                                         const Function *Filter,
+                                         const BlockAddress *RecoverBA) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Filter;
+  Handler.RecoverBA = RecoverBA;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+                                           const Function *Cleanup) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Cleanup;
+  Handler.RecoverBA = nullptr;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
+                                            ArrayRef<unsigned> Sites) {
+  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) {
+  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+    if (TypeInfos[i] == TI) return i + 1;
+
+  TypeInfos.push_back(TI);
+  return TypeInfos.size();
+}
+
+int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) {
+  // If the new filter coincides with the tail of an existing filter, then
+  // re-use the existing filter.  Folding filters more than this requires
+  // re-ordering filters and/or their elements - probably not worth it.
+  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+       E = FilterEnds.end(); I != E; ++I) {
+    unsigned i = *I, j = TyIds.size();
+
+    while (i && j)
+      if (FilterIds[--i] != TyIds[--j])
+        goto try_next;
+
+    if (!j)
+      // The new filter coincides with range [i, end) of the existing filter.
+      return -(1 + i);
+
+try_next:;
+  }
+
+  // Add the new filter.
+  int FilterID = -(1 + FilterIds.size());
+  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+  FilterEnds.push_back(FilterIds.size());
+  FilterIds.push_back(0); // terminator
+  return FilterID;
+}
+
+void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
+  if (const auto *PF = dyn_cast<Function>(
+          I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+    MF.getMMI().addPersonality(PF);
+
+  if (I.isCleanup())
+    MF.addCleanup(&MBB);
+
+  // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+  //        but we need to do it this way because of how the DWARF EH emitter
+  //        processes the clauses.
+  for (unsigned i = I.getNumClauses(); i != 0; --i) {
+    Value *Val = I.getClause(i - 1);
+    if (I.isCatch(i - 1)) {
+      MF.addCatchTypeInfo(&MBB,
+                          dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+    } else {
+      // Add filters in a list.
+      Constant *CVal = cast<Constant>(Val);
+      SmallVector<const GlobalValue *, 4> FilterList;
+      for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+           II != IE; ++II)
+        FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+      MF.addFilterTypeInfo(&MBB, FilterList);
+    }
+  }
+}
+
+/// \}
+
 //===----------------------------------------------------------------------===//
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
@@ -634,11 +839,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
 /// Create a spill slot at a fixed location on the stack.
 /// Returns an index with a negative value.
 int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
-                                                  int64_t SPOffset) {
+                                                  int64_t SPOffset,
+                                                  bool Immutable) {
   unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
   Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
-                                              /*Immutable*/ true,
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/ true,
                                               /*Alloca*/ nullptr,
                                               /*isAliased*/ false));
@@ -890,13 +1095,20 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
 }
 
 MachineConstantPool::~MachineConstantPool() {
+  // A constant may be a member of both Constants and MachineCPVsSharingEntries,
+  // so keep track of which we've deleted to avoid double deletions.
+  DenseSet<MachineConstantPoolValue*> Deleted;
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
-    if (Constants[i].isMachineConstantPoolEntry())
+    if (Constants[i].isMachineConstantPoolEntry()) {
+      Deleted.insert(Constants[i].Val.MachineCPVal);
       delete Constants[i].Val.MachineCPVal;
+    }
   for (DenseSet<MachineConstantPoolValue*>::iterator I =
        MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
-       I != E; ++I)
-    delete *I;
+       I != E; ++I) {
+    if (Deleted.count(*I) == 0)
+      delete *I;
+  }
 }
 
 /// Test whether the given two constants can be allocated the same constant pool
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
deleted file mode 100644
index 338cd1e..0000000
--- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the definitions of the MachineFunctionAnalysis members.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
-using namespace llvm;
-
-char MachineFunctionAnalysis::ID = 0;
-
-MachineFunctionAnalysis::MachineFunctionAnalysis(
-    const TargetMachine &tm, MachineFunctionInitializer *MFInitializer)
-    : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) {
-  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-}
-
-MachineFunctionAnalysis::~MachineFunctionAnalysis() {
-  releaseMemory();
-  assert(!MF && "MachineFunctionAnalysis left initialized!");
-}
-
-void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<MachineModuleInfo>();
-}
-
-bool MachineFunctionAnalysis::doInitialization(Module &M) {
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI && "MMI not around yet??");
-  MMI->setModule(&M);
-  NextFnNum = 0;
-  return false;
-}
-
-
-bool MachineFunctionAnalysis::runOnFunction(Function &F) {
-  assert(!MF && "MachineFunctionAnalysis already initialized!");
-  MF = new MachineFunction(&F, TM, NextFnNum++,
-                           getAnalysis<MachineModuleInfo>());
-  if (MFInitializer)
-    MFInitializer->initializeMachineFunction(*MF);
-  return false;
-}
-
-void MachineFunctionAnalysis::releaseMemory() {
-  delete MF;
-  MF = nullptr;
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 228fe17..2265676 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Dominators.h"
@@ -41,7 +41,9 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
   if (F.hasAvailableExternallyLinkage())
     return false;
 
-  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+  MachineFunction &MF = MMI.getMachineFunction(F);
+
   MachineFunctionProperties &MFProps = MF.getProperties();
 
 #ifndef NDEBUG
@@ -49,7 +51,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
     errs() << "MachineFunctionProperties required by " << getPassName()
            << " pass are not met by function " << F.getName() << ".\n"
            << "Required properties: ";
-    RequiredProperties.print(errs(), /*OnlySet=*/true);
+    RequiredProperties.print(errs());
     errs() << "\nCurrent properties: ";
     MFProps.print(errs());
     errs() << "\n";
@@ -60,13 +62,13 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
   bool RV = runOnMachineFunction(MF);
 
   MFProps.set(SetProperties);
-  MFProps.clear(ClearedProperties);
+  MFProps.reset(ClearedProperties);
   return RV;
 }
 
 void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineFunctionAnalysis>();
-  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addPreserved<MachineModuleInfo>();
 
   // MachineFunctionPass preserves all LLVM IR passes, but there's no
   // high-level way to express this. Instead, just list a bunch of
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 4f424ff..0d533c3 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -34,7 +34,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
   MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
       : MachineFunctionPass(ID), OS(os), Banner(banner) {}
 
-  const char *getPassName() const override { return "MachineFunction Printer"; }
+  StringRef getPassName() const override { return "MachineFunction Printer"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 3cdf8d2..2f2e3b3 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -40,6 +41,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -91,6 +93,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
     // Note that getSubReg() may return 0 if the sub-register doesn't exist.
     // That won't happen in legal code.
     setSubReg(0);
+    if (isDef())
+      setIsUndef(false);
   }
   setReg(Reg);
 }
@@ -171,6 +175,16 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
   Contents.Sym = Sym;
 }
 
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a FrameIndex");
+
+  removeRegFromUses();
+
+  OpKind = MO_FrameIndex;
+  setIndex(Idx);
+}
+
 /// ChangeToRegister - Replace this operand with a new register operand of
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
@@ -256,6 +270,10 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return getCFIIndex() == Other.getCFIIndex();
   case MachineOperand::MO_Metadata:
     return getMetadata() == Other.getMetadata();
+  case MachineOperand::MO_IntrinsicID:
+    return getIntrinsicID() == Other.getIntrinsicID();
+  case MachineOperand::MO_Predicate:
+    return getPredicate() == Other.getPredicate();
   }
   llvm_unreachable("Invalid machine operand type");
 }
@@ -300,18 +318,23 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
   case MachineOperand::MO_CFIIndex:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+  case MachineOperand::MO_IntrinsicID:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+  case MachineOperand::MO_Predicate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
   }
   llvm_unreachable("Invalid machine operand type");
 }
 
-void MachineOperand::print(raw_ostream &OS,
-                           const TargetRegisterInfo *TRI) const {
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
   ModuleSlotTracker DummyMST(nullptr);
-  print(OS, DummyMST, TRI);
+  print(OS, DummyMST, TRI, IntrinsicInfo);
 }
 
 void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                           const TargetRegisterInfo *TRI) const {
+                           const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
     OS << PrintReg(getReg(), TRI, getSubReg());
@@ -378,7 +401,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
     } else if (getFPImm()->getType()->isHalfTy()) {
       APFloat APF = getFPImm()->getValueAPF();
       bool Unused;
-      APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused);
+      APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
       OS << "half " << APF.convertToFloat();
     } else {
       OS << getFPImm()->getValueAPF().convertToDouble();
@@ -454,12 +477,32 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
   case MachineOperand::MO_CFIIndex:
     OS << "<call frame instruction>";
     break;
+  case MachineOperand::MO_IntrinsicID: {
+    Intrinsic::ID ID = getIntrinsicID();
+    if (ID < Intrinsic::num_intrinsics)
+      OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+    else if (IntrinsicInfo)
+      OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+    else
+      OS << "<intrinsic:" << ID << '>';
+    break;
+  }
+  case MachineOperand::MO_Predicate: {
+    auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+    OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
+       << CmpInst::getPredicateName(Pred) << '>';
+  }
   }
-
   if (unsigned TF = getTargetFlags())
     OS << "[TF=" << TF << ']';
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const {
+  dbgs() << *this << '\n';
+}
+#endif
+
 //===----------------------------------------------------------------------===//
 // MachineMemOperand Implementation
 //===----------------------------------------------------------------------===//
@@ -500,7 +543,10 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
 MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
                                      uint64_t s, unsigned int a,
                                      const AAMDNodes &AAInfo,
-                                     const MDNode *Ranges)
+                                     const MDNode *Ranges,
+                                     SynchronizationScope SynchScope,
+                                     AtomicOrdering Ordering,
+                                     AtomicOrdering FailureOrdering)
     : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
       AAInfo(AAInfo), Ranges(Ranges) {
   assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
@@ -508,6 +554,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
          "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
+
+  AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope);
+  assert(getSynchScope() == SynchScope && "Value truncated");
+  AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+  assert(getOrdering() == Ordering && "Value truncated");
+  AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+  assert(getFailureOrdering() == FailureOrdering && "Value truncated");
 }
 
 /// Profile - Gather unique data for the object.
@@ -623,10 +676,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
     OS << ")";
   }
 
-  // Print nontemporal info.
   if (isNonTemporal())
     OS << "(nontemporal)";
-
+  if (isDereferenceable())
+    OS << "(dereferenceable)";
   if (isInvariant())
     OS << "(invariant)";
 }
@@ -653,12 +706,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
                            DebugLoc dl, bool NoImp)
     : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
       AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
-      debugLoc(std::move(dl))
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-      ,
-      Ty(nullptr)
-#endif
-{
+      debugLoc(std::move(dl)) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   // Reserve space for the expected number of operands.
@@ -677,12 +725,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
     : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
       Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
-      MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc())
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-      ,
-      Ty(nullptr)
-#endif
-{
+      MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -705,25 +748,6 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
   return nullptr;
 }
 
-// Implement dummy setter and getter for type when
-// global-isel is not built.
-// The proper implementation is WIP and is tracked here:
-// PR26576.
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-void MachineInstr::setType(Type *Ty) {}
-
-Type *MachineInstr::getType() const { return nullptr; }
-
-#else
-void MachineInstr::setType(Type *Ty) {
-  assert((!Ty || isPreISelGenericOpcode(getOpcode())) &&
-         "Non generic instructions are not supposed to be typed");
-  this->Ty = Ty;
-}
-
-Type *MachineInstr::getType() const { return Ty; }
-#endif // LLVM_BUILD_GLOBAL_ISEL
-
 /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
@@ -976,16 +1000,24 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
     return false;
 
   if (isBundle()) {
-    // Both instructions are bundles, compare MIs inside the bundle.
+    // We have passed the test above that both instructions have the same
+    // opcode, so we know that both instructions are bundles here. Let's compare
+    // MIs inside the bundle.
+    assert(Other.isBundle() && "Expected that both instructions are bundles.");
     MachineBasicBlock::const_instr_iterator I1 = getIterator();
-    MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
     MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
-    MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end();
-    while (++I1 != E1 && I1->isInsideBundle()) {
+    // Loop until we analysed the last intruction inside at least one of the
+    // bundles.
+    while (I1->isBundledWithSucc() && I2->isBundledWithSucc()) {
+      ++I1;
       ++I2;
-      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check))
+      if (!I1->isIdenticalTo(*I2, Check))
         return false;
     }
+    // If we've reached the end of just one of the two bundles, but not both,
+    // the instructions are not identical.
+    if (I1->isBundledWithSucc() || I2->isBundledWithSucc())
+      return false;
   }
 
   // Check operands to make sure they match.
@@ -1287,8 +1319,8 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
-int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
-                                          const TargetRegisterInfo *TRI) const {
+int MachineInstr::findRegisterUseOperandIdx(
+    unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     if (!MO.isReg() || !MO.isUse())
@@ -1296,11 +1328,9 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
     unsigned MOReg = MO.getReg();
     if (!MOReg)
       continue;
-    if (MOReg == Reg ||
-        (TRI &&
-         TargetRegisterInfo::isPhysicalRegister(MOReg) &&
-         TargetRegisterInfo::isPhysicalRegister(Reg) &&
-         TRI->isSubRegister(MOReg, Reg)))
+    if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+                         TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                         TRI->isSubRegister(MOReg, Reg)))
       if (!isKill || MO.isKill())
         return i;
   }
@@ -1533,7 +1563,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (mayLoad() && !isInvariantLoad(AA))
+  if (mayLoad() && !isDereferenceableInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, we can't move it.
     return !SawStore;
@@ -1564,12 +1594,10 @@ bool MachineInstr::hasOrderedMemoryRef() const {
   });
 }
 
-/// isInvariantLoad - Return true if this instruction is loading from a
-/// location whose value is invariant across the function.  For example,
-/// loading a value from the constant pool or from the argument area
-/// of a function if it does not change.  This should only return true of
-/// *all* loads the instruction does are invariant (if it does multiple loads).
-bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+/// isDereferenceableInvariantLoad - Return true if this instruction will never
+/// trap and is loading from a location whose value is invariant across a run of
+/// this function.
+bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
   if (!mayLoad())
     return false;
@@ -1579,16 +1607,17 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   if (memoperands_empty())
     return false;
 
-  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+  const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
 
   for (MachineMemOperand *MMO : memoperands()) {
     if (MMO->isVolatile()) return false;
     if (MMO->isStore()) return false;
-    if (MMO->isInvariant()) continue;
+    if (MMO->isInvariant() && MMO->isDereferenceable())
+      continue;
 
     // A load from a constant PseudoSourceValue is invariant.
     if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
-      if (PSV->isConstant(MFI))
+      if (PSV->isConstant(&MFI))
         continue;
 
     if (const Value *V = MMO->getValue()) {
@@ -1663,35 +1692,40 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
   }
 }
 
-LLVM_DUMP_METHOD void MachineInstr::dump() const {
+LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  dbgs() << "  " << *this;
+  dbgs() << "  ";
+  print(dbgs(), false /* SkipOpers */, TII);
 #endif
 }
 
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+                         const TargetInstrInfo *TII) const {
   const Module *M = nullptr;
   if (const MachineBasicBlock *MBB = getParent())
     if (const MachineFunction *MF = MBB->getParent())
       M = MF->getFunction()->getParent();
 
   ModuleSlotTracker MST(M);
-  print(OS, MST, SkipOpers);
+  print(OS, MST, SkipOpers, TII);
 }
 
 void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                         bool SkipOpers) const {
+                         bool SkipOpers, const TargetInstrInfo *TII) const {
   // We can be a bit tidier if we know the MachineFunction.
   const MachineFunction *MF = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
-  const TargetInstrInfo *TII = nullptr;
+  const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
+
   if (const MachineBasicBlock *MBB = getParent()) {
     MF = MBB->getParent();
     if (MF) {
       MRI = &MF->getRegInfo();
       TRI = MF->getSubtarget().getRegisterInfo();
-      TII = MF->getSubtarget().getInstrInfo();
+      if (!TII)
+        TII = MF->getSubtarget().getInstrInfo();
+      IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
     }
   }
 
@@ -1705,13 +1739,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
          !getOperand(StartOp).isImplicit();
        ++StartOp) {
     if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, MST, TRI);
+    getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
     unsigned Reg = getOperand(StartOp).getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       VirtRegs.push_back(Reg);
-      unsigned Size;
-      if (MRI && (Size = MRI->getSize(Reg)))
-        OS << '(' << Size << ')';
+      LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
+      if (Ty.isValid())
+        OS << '(' << Ty << ')';
     }
   }
 
@@ -1724,12 +1758,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
   else
     OS << "UNKNOWN";
 
-  if (getType()) {
-    OS << ' ';
-    getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
-    OS << ' ';
-  }
-
   if (SkipOpers)
     return;
 
@@ -1812,7 +1840,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
         OS << "!\"" << DIV->getName() << '\"';
       else
         MO.print(OS, MST, TRI);
-    } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+    } else if (TRI && (isInsertSubreg() || isRegSequence() ||
+                       (isSubregToReg() && i == 3)) && MO.isImm()) {
       OS << TRI->getSubRegIndexName(MO.getImm());
     } else if (i == AsmDescOp && MO.isImm()) {
       // Pretty print the inline asm operand descriptor.
@@ -2145,8 +2174,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     // If there are no uses, including partial uses, the def is dead.
-    if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
-                     [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+    if (none_of(UsedRegs,
+                [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
       MO.setIsDead();
   }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index e4686b3..b5621a0 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -24,7 +24,8 @@ namespace {
   class UnpackMachineBundles : public MachineFunctionPass {
   public:
     static char ID; // Pass identification
-    UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
+    UnpackMachineBundles(
+        std::function<bool(const MachineFunction &)> Ftor = nullptr)
         : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
       initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
     }
@@ -32,7 +33,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
-    std::function<bool(const Function &)> PredicateFtor;
+    std::function<bool(const MachineFunction &)> PredicateFtor;
   };
 } // end anonymous namespace
 
@@ -42,7 +43,7 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
                 "Unpack machine instruction bundles", false, false)
 
 bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
-  if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+  if (PredicateFtor && !PredicateFtor(MF))
     return false;
 
   bool Changed = false;
@@ -78,7 +79,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass *
-llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
+llvm::createUnpackMachineBundles(
+    std::function<bool(const MachineFunction &)> Ftor) {
   return new UnpackMachineBundles(std::move(Ftor));
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 119751b..b3d1843 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -92,8 +92,7 @@ namespace {
     SmallVector<MachineBasicBlock*, 8> ExitBlocks;
 
     bool isExitBlock(const MachineBasicBlock *MBB) const {
-      return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
-        ExitBlocks.end();
+      return is_contained(ExitBlocks, MBB);
     }
 
     // Track 'estimated' register pressure.
@@ -268,7 +267,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   TII = ST.getInstrInfo();
   TLI = ST.getTargetLowering();
   TRI = ST.getRegisterInfo();
-  MFI = MF.getFrameInfo();
+  MFI = &MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 
@@ -896,7 +895,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+        if (!MRI->isConstantPhysReg(Reg))
           return false;
         // Otherwise it's safe to move.
         continue;
@@ -1139,7 +1138,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 
   // High register pressure situation, only hoist if the instruction is going
   // to be remat'ed.
-  if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) {
+  if (!TII->isTriviallyReMaterializable(MI, AA) &&
+      !MI.isDereferenceableInvariantLoad(AA)) {
     DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
     return false;
   }
@@ -1158,7 +1158,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!MI->isInvariantLoad(AA))
+  if (!MI->isDereferenceableInvariantLoad(AA))
     return nullptr;
 
   // Next determine the register class for a temporary register.
@@ -1336,6 +1336,11 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
     // Otherwise, splice the instruction to the preheader.
     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
 
+    // Since we are moving the instruction out of its basic block, we do not
+    // retain its debug location. Doing so would degrade the debugging 
+    // experience and adversely affect the accuracy of profiling information.
+    MI->setDebugLoc(DebugLoc());
+
     // Update register pressure for BBs from header to this block.
     UpdateBackTraceRegPressure(MI);
 
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 376f78f..fdeaf7b 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -77,6 +77,51 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   return BotMBB;
 }
 
+MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+  if (MachineBasicBlock *Latch = getLoopLatch()) {
+    if (isLoopExiting(Latch))
+      return Latch;
+    else
+      return getExitingBlock();
+  }
+  return nullptr;
+}
+
+MachineBasicBlock *
+MachineLoopInfo::findLoopPreheader(MachineLoop *L,
+                                   bool SpeculativePreheader) const {
+  if (MachineBasicBlock *PB = L->getLoopPreheader())
+    return PB;
+
+  if (!SpeculativePreheader)
+    return nullptr;
+
+  MachineBasicBlock *HB = L->getHeader(), *LB = L->getLoopLatch();
+  if (HB->pred_size() != 2 || HB->hasAddressTaken())
+    return nullptr;
+  // Find the predecessor of the header that is not the latch block.
+  MachineBasicBlock *Preheader = nullptr;
+  for (MachineBasicBlock *P : HB->predecessors()) {
+    if (P == LB)
+      continue;
+    // Sanity.
+    if (Preheader)
+      return nullptr;
+    Preheader = P;
+  }
+
+  // Check if the preheader candidate is a successor of any other loop
+  // headers. We want to avoid having two loop setups in the same block.
+  for (MachineBasicBlock *S : Preheader->successors()) {
+    if (S == HB)
+      continue;
+    MachineLoop *T = getLoopFor(S);
+    if (T && T->getHeader() == S)
+      return nullptr;
+  }
+  return Preheader;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineLoop::dump() const {
   print(dbgs());
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 244e3fb..6618857 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,26 +9,31 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
 // Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
-                "Machine Module Information", false, false)
+INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo",
+                   "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
@@ -54,7 +59,7 @@ public:
 class MMIAddrLabelMap {
   MCContext &Context;
   struct AddrLabelSymEntry {
-    /// Symbols - The symbols for the label.
+    /// The symbols for the label.
     TinyPtrVector<MCSymbol *> Symbols;
 
     Function *Fn;   // The containing function of the BasicBlock.
@@ -63,14 +68,13 @@ class MMIAddrLabelMap {
 
   DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
 
-  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
-  /// use this so we get notified if a block is deleted or RAUWd.
+  /// Callbacks for the BasicBlock's that we have entries for.  We use this so
+  /// we get notified if a block is deleted or RAUWd.
   std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
 
-  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
-  /// whose corresponding BasicBlock got deleted.  These symbols need to be
-  /// emitted at some point in the file, so AsmPrinter emits them after the
-  /// function body.
+  /// This is a per-function list of symbols whose corresponding BasicBlock got
+  /// deleted.  These symbols need to be emitted at some point in the file, so
+  /// AsmPrinter emits them after the function body.
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
     DeletedAddrLabelsNeedingEmission;
 public:
@@ -112,8 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   return Entry.Symbols;
 }
 
-/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
-/// them.
+/// If we have any deleted symbols for F, return them.
 void MMIAddrLabelMap::
 takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
@@ -186,20 +189,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
-                                     const MCRegisterInfo &MRI,
-                                     const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) {
+MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+  : ImmutablePass(ID), TM(*TM),
+    Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+            TM->getObjFileLowering(), nullptr, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
-MachineModuleInfo::MachineModuleInfo()
-  : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) {
-  llvm_unreachable("This MachineModuleInfo constructor should never be called, "
-                   "MMI should always be explicitly constructed by "
-                   "LLVMTargetMachine");
-}
-
 MachineModuleInfo::~MachineModuleInfo() {
 }
 
@@ -207,13 +203,9 @@ bool MachineModuleInfo::doInitialization(Module &M) {
 
   ObjFileMMI = nullptr;
   CurCallSite = 0;
-  CallsEHReturn = false;
-  CallsUnwindInit = false;
-  HasEHFunclets = false;
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
-  PersonalityTypeCache = EHPersonality::Unknown;
   AddrLabelSymbols = nullptr;
-  TheModule = nullptr;
+  TheModule = &M;
 
   return false;
 }
@@ -233,30 +225,8 @@ bool MachineModuleInfo::doFinalization(Module &M) {
   return false;
 }
 
-/// EndFunction - Discard function meta information.
-///
-void MachineModuleInfo::EndFunction() {
-  // Clean up frame info.
-  FrameInstructions.clear();
-
-  // Clean up exception info.
-  LandingPads.clear();
-  PersonalityTypeCache = EHPersonality::Unknown;
-  CallSiteMap.clear();
-  TypeInfos.clear();
-  FilterIds.clear();
-  FilterEnds.clear();
-  CallsEHReturn = false;
-  CallsUnwindInit = false;
-  HasEHFunclets = false;
-  VariableDbgInfos.clear();
-}
-
 //===- Address of Block Management ----------------------------------------===//
 
-/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
-/// basic block when its address is taken.  If other blocks were RAUW'd to
-/// this one, we may have to emit them as well, return the whole set.
 ArrayRef<MCSymbol *>
 MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
   // Lazily create AddrLabelSymbols.
@@ -265,11 +235,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
  return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
 }
 
-
-/// takeDeletedSymbolsForFunction - If the specified function has had any
-/// references to address-taken blocks generated, but the block got deleted,
-/// return the symbol now so we can emit it.  This prevents emitting a
-/// reference to a symbol that has no definition.
 void MachineModuleInfo::
 takeDeletedSymbolsForFunction(const Function *F,
                               std::vector<MCSymbol*> &Result) {
@@ -279,40 +244,8 @@ takeDeletedSymbolsForFunction(const Function *F,
      takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
 }
 
-//===- EH -----------------------------------------------------------------===//
-
-/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
-/// specified MachineBasicBlock.
-LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
-    (MachineBasicBlock *LandingPad) {
-  unsigned N = LandingPads.size();
-  for (unsigned i = 0; i < N; ++i) {
-    LandingPadInfo &LP = LandingPads[i];
-    if (LP.LandingPadBlock == LandingPad)
-      return LP;
-  }
-
-  LandingPads.push_back(LandingPadInfo(LandingPad));
-  return LandingPads[N];
-}
-
-/// addInvoke - Provide the begin and end labels of an invoke style call and
-/// associate it with a try landing pad block.
-void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
-                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.BeginLabels.push_back(BeginLabel);
-  LP.EndLabels.push_back(EndLabel);
-}
-
-/// addLandingPad - Provide the label of a try LandingPad block.
-///
-MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  MCSymbol *LandingPadLabel = Context.createTempSymbol();
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.LandingPadLabel = LandingPadLabel;
-  return LandingPadLabel;
-}
+/// \name Exception Handling
+/// \{
 
 void MachineModuleInfo::addPersonality(const Function *Personality) {
   for (unsigned i = 0; i < Personalities.size(); ++i)
@@ -321,143 +254,83 @@ void MachineModuleInfo::addPersonality(const Function *Personality) {
   Personalities.push_back(Personality);
 }
 
-/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                 ArrayRef<const GlobalValue *> TyInfo) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  for (unsigned N = TyInfo.size(); N; --N)
-    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
-}
-
-/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                  ArrayRef<const GlobalValue *> TyInfo) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  std::vector<unsigned> IdsInFilter(TyInfo.size());
-  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
-    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
-  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
-}
-
-/// addCleanup - Add a cleanup action for a landing pad.
-///
-void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.TypeIds.push_back(0);
-}
+/// \}
+
+MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
+  // Shortcut for the common case where a sequence of MachineFunctionPasses
+  // all query for the same Function.
+  if (LastRequest == &F)
+    return *LastResult;
+
+  auto I = MachineFunctions.insert(
+      std::make_pair(&F, std::unique_ptr<MachineFunction>()));
+  MachineFunction *MF;
+  if (I.second) {
+    // No pre-existing machine function, create a new one.
+    MF = new MachineFunction(&F, TM, NextFnNum++, *this);
+    // Update the set entry.
+    I.first->second.reset(MF);
+
+    if (MFInitializer)
+      if (MFInitializer->initializeMachineFunction(*MF))
+        report_fatal_error("Unable to initialize machine function");
+  } else {
+    MF = I.first->second.get();
+  }
 
-void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
-                                           const Function *Filter,
-                                           const BlockAddress *RecoverBA) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  SEHHandler Handler;
-  Handler.FilterOrFinally = Filter;
-  Handler.RecoverBA = RecoverBA;
-  LP.SEHHandlers.push_back(Handler);
+  LastRequest = &F;
+  LastResult = MF;
+  return *MF;
 }
 
-void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
-                                             const Function *Cleanup) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  SEHHandler Handler;
-  Handler.FilterOrFinally = Cleanup;
-  Handler.RecoverBA = nullptr;
-  LP.SEHHandlers.push_back(Handler);
+void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
+  MachineFunctions.erase(&F);
+  LastRequest = nullptr;
+  LastResult = nullptr;
 }
 
-/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
-/// pads.
-void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
-  for (unsigned i = 0; i != LandingPads.size(); ) {
-    LandingPadInfo &LandingPad = LandingPads[i];
-    if (LandingPad.LandingPadLabel &&
-        !LandingPad.LandingPadLabel->isDefined() &&
-        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
-      LandingPad.LandingPadLabel = nullptr;
-
-    // Special case: we *should* emit LPs with null LP MBB. This indicates
-    // "nounwind" case.
-    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
-      LandingPads.erase(LandingPads.begin() + i);
-      continue;
-    }
-
-    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
-      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
-      MCSymbol *EndLabel = LandingPad.EndLabels[j];
-      if ((BeginLabel->isDefined() ||
-           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
-          (EndLabel->isDefined() ||
-           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
-      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
-      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
-      --j;
-      --e;
-    }
-
-    // Remove landing pads with no try-ranges.
-    if (LandingPads[i].BeginLabels.empty()) {
-      LandingPads.erase(LandingPads.begin() + i);
-      continue;
-    }
+namespace {
+/// This pass frees the MachineFunction object associated with a Function.
+class FreeMachineFunction : public FunctionPass {
+public:
+  static char ID;
+  FreeMachineFunction() : FunctionPass(ID) {}
 
-    // If there is no landing pad, ensure that the list of typeids is empty.
-    // If the only typeid is a cleanup, this is the same as having no typeids.
-    if (!LandingPad.LandingPadBlock ||
-        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
-      LandingPad.TypeIds.clear();
-    ++i;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineModuleInfo>();
+    AU.addPreserved<MachineModuleInfo>();
   }
-}
 
-/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
-/// indexes.
-void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
-                                              ArrayRef<unsigned> Sites) {
-  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
-}
-
-/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
-/// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
-  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
-    if (TypeInfos[i] == TI) return i + 1;
+  bool runOnFunction(Function &F) override {
+    MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+    MMI.deleteMachineFunctionFor(F);
+    return true;
+  }
+};
+char FreeMachineFunction::ID;
+} // end anonymous namespace
 
-  TypeInfos.push_back(TI);
-  return TypeInfos.size();
+namespace llvm {
+FunctionPass *createFreeMachineFunctionPass() {
+  return new FreeMachineFunction();
 }
-
-/// getFilterIDFor - Return the filter id for the specified typeinfos.  This is
-/// function wide.
-int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
-  // If the new filter coincides with the tail of an existing filter, then
-  // re-use the existing filter.  Folding filters more than this requires
-  // re-ordering filters and/or their elements - probably not worth it.
-  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
-       E = FilterEnds.end(); I != E; ++I) {
-    unsigned i = *I, j = TyIds.size();
-
-    while (i && j)
-      if (FilterIds[--i] != TyIds[--j])
-        goto try_next;
-
-    if (!j)
-      // The new filter coincides with range [i, end) of the existing filter.
-      return -(1 + i);
-
-try_next:;
+} // end namespace llvm
+
+//===- MMI building helpers -----------------------------------------------===//
+
+void llvm::computeUsesVAFloatArgument(const CallInst &I,
+                                      MachineModuleInfo &MMI) {
+  FunctionType *FT =
+      cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      Type *T = I.getArgOperand(i)->getType();
+      for (auto i : post_order(T)) {
+        if (i->isFloatingPointTy()) {
+          MMI.setUsesVAFloatArgument(true);
+          return;
+        }
+      }
+    }
   }
-
-  // Add the new filter.
-  int FilterID = -(1 + FilterIds.size());
-  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
-  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
-  FilterEnds.push_back(FilterIds.size());
-  FilterIds.push_back(0); // terminator
-  return FilterID;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
new file mode 100644
index 0000000..43a1809
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -0,0 +1,3984 @@
+//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlap loop iterations and explioits ILP via a compiler transformation.
+//
+// Swing Modulo Scheduling is an implementation of software pipelining
+// that generates schedules that are near optimal in terms of initiation
+// interval, register requirements, and stage count. See the papers:
+//
+// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
+// Conference on Parallel Architectures and Compilation Techiniques.
+//
+// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
+// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
+// Transactions on Computers, Vol. 50, No. 3, 2001.
+//
+// "An Implementation of Swing Modulo Scheduling With Extensions for
+// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
+// Urbana-Chambpain, 2005.
+//
+//
+// The SMS algorithm consists of three main steps after computing the minimal
+// initiation interval (MII).
+// 1) Analyze the dependence graph and compute information about each
+//    instruction in the graph.
+// 2) Order the nodes (instructions) by priority based upon the heuristics
+//    described in the algorithm.
+// 3) Attempt to schedule the nodes in the specified order using the MII.
+//
+// This SMS implementation is a target-independent back-end pass. When enabled,
+// the pass runs just prior to the register allocation pass, while the machine
+// IR is in SSA form. If software pipelining is successful, then the original
+// loop is replaced by the optimized loop. The optimized loop contains one or
+// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
+// the instructions cannot be scheduled in a given MII, we increase the MII by
+// one and try again.
+//
+// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
+// represent loop carried dependences in the DAG as order edges to the Phi
+// nodes. We also perform several passes over the DAG to eliminate unnecessary
+// edges that inhibit the ability to pipeline. The implementation uses the
+// DFAPacketizer class to compute the minimum initiation interval and the check
+// where an instruction may be inserted in the pipelined schedule.
+//
+// In order for the SMS pass to work, several target specific hooks need to be
+// implemented to get information about the loop structure and to rewrite
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <map>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
+STATISTIC(NumPipelined, "Number of loops software pipelined");
+
+/// A command line option to turn software pipelining on or off.
+static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
+                               cl::ZeroOrMore,
+                               cl::desc("Enable Software Pipelining"));
+
+/// A command line option to enable SWP at -Os.
+static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
+                                      cl::desc("Enable SWP at Os."), cl::Hidden,
+                                      cl::init(false));
+
+/// A command line argument to limit minimum initial interval for pipelining.
+static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
+                              cl::desc("Size limit for the the MII."),
+                              cl::Hidden, cl::init(27));
+
+/// A command line argument to limit the number of stages in the pipeline.
+static cl::opt<int>
+    SwpMaxStages("pipeliner-max-stages",
+                 cl::desc("Maximum stages allowed in the generated scheduled."),
+                 cl::Hidden, cl::init(3));
+
+/// A command line option to disable the pruning of chain dependences due to
+/// an unrelated Phi.
+static cl::opt<bool>
+    SwpPruneDeps("pipeliner-prune-deps",
+                 cl::desc("Prune dependences between unrelated Phi nodes."),
+                 cl::Hidden, cl::init(true));
+
+/// A command line option to disable the pruning of loop carried order
+/// dependences.
+static cl::opt<bool>
+    SwpPruneLoopCarried("pipeliner-prune-loop-carried",
+                        cl::desc("Prune loop carried order dependences."),
+                        cl::Hidden, cl::init(true));
+
+#ifndef NDEBUG
+static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
+#endif
+
+static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
+                                     cl::ReallyHidden, cl::init(false),
+                                     cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+
+namespace {
+
+class NodeSet;
+class SMSchedule;
+class SwingSchedulerDAG;
+
+/// The main class in the implementation of the target independent
+/// software pipeliner pass.
+class MachinePipeliner : public MachineFunctionPass {
+public:
+  MachineFunction *MF = nullptr;
+  const MachineLoopInfo *MLI = nullptr;
+  const MachineDominatorTree *MDT = nullptr;
+  const InstrItineraryData *InstrItins;
+  const TargetInstrInfo *TII = nullptr;
+  RegisterClassInfo RegClassInfo;
+
+#ifndef NDEBUG
+  static int NumTries;
+#endif
+  /// Cache the target analysis information about the loop.
+  struct LoopInfo {
+    MachineBasicBlock *TBB = nullptr;
+    MachineBasicBlock *FBB = nullptr;
+    SmallVector<MachineOperand, 4> BrCond;
+    MachineInstr *LoopInductionVar = nullptr;
+    MachineInstr *LoopCompare = nullptr;
+  };
+  LoopInfo LI;
+
+  static char ID;
+  MachinePipeliner() : MachineFunctionPass(ID) {
+    initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<LiveIntervals>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool canPipelineLoop(MachineLoop &L);
+  bool scheduleLoop(MachineLoop &L);
+  bool swingModuloScheduler(MachineLoop &L);
+};
+
+/// This class builds the dependence graph for the instructions in a loop,
+/// and attempts to schedule the instructions using the SMS algorithm.
+class SwingSchedulerDAG : public ScheduleDAGInstrs {
+  MachinePipeliner &Pass;
+  /// The minimum initiation interval between iterations for this schedule.
+  unsigned MII;
+  /// Set to true if a valid pipelined schedule is found for the loop.
+  bool Scheduled;
+  MachineLoop &Loop;
+  LiveIntervals &LIS;
+  const RegisterClassInfo &RegClassInfo;
+
+  /// A toplogical ordering of the SUnits, which is needed for changing
+  /// dependences and iterating over the SUnits.
+  ScheduleDAGTopologicalSort Topo;
+
+  struct NodeInfo {
+    int ASAP;
+    int ALAP;
+    NodeInfo() : ASAP(0), ALAP(0) {}
+  };
+  /// Computed properties for each node in the graph.
+  std::vector<NodeInfo> ScheduleInfo;
+
+  enum OrderKind { BottomUp = 0, TopDown = 1 };
+  /// Computed node ordering for scheduling.
+  SetVector<SUnit *> NodeOrder;
+
+  typedef SmallVector<NodeSet, 8> NodeSetType;
+  typedef DenseMap<unsigned, unsigned> ValueMapTy;
+  typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
+  typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
+
+  /// Instructions to change when emitting the final schedule.
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
+
+  /// We may create a new instruction, so remember it because it
+  /// must be deleted when the pass is finished.
+  SmallPtrSet<MachineInstr *, 4> NewMIs;
+
+  /// Ordered list of DAG postprocessing steps.
+  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+  /// Helper class to implement Johnson's circuit finding algorithm.
+  class Circuits {
+    std::vector<SUnit> &SUnits;
+    SetVector<SUnit *> Stack;
+    BitVector Blocked;
+    SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
+    SmallVector<SmallVector<int, 4>, 16> AdjK;
+    unsigned NumPaths;
+    static unsigned MaxPaths;
+
+  public:
+    Circuits(std::vector<SUnit> &SUs)
+        : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
+          AdjK(SUs.size()) {}
+    /// Reset the data structures used in the circuit algorithm.
+    void reset() {
+      Stack.clear();
+      Blocked.reset();
+      B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
+      NumPaths = 0;
+    }
+    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+    bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
+    void unblock(int U);
+  };
+
+public:
+  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
+                    const RegisterClassInfo &rci)
+      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
+        Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
+        Topo(SUnits, &ExitSU) {
+    P.MF->getSubtarget().getSMSMutations(Mutations);
+  }
+
+  void schedule() override;
+  void finishBlock() override;
+
+  /// Return true if the loop kernel has been scheduled.
+  bool hasNewSchedule() { return Scheduled; }
+
+  /// Return the earliest time an instruction may be scheduled.
+  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
+
+  /// Return the latest time an instruction my be scheduled.
+  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
+
+  /// The mobility function, which the the number of slots in which
+  /// an instruction may be scheduled.
+  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
+
+  /// The depth, in the dependence graph, for a node.
+  int getDepth(SUnit *Node) { return Node->getDepth(); }
+
+  /// The height, in the dependence graph, for a node.
+  int getHeight(SUnit *Node) { return Node->getHeight(); }
+
+  /// Return true if the dependence is a back-edge in the data dependence graph.
+  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
+  /// using an anti dependence from a Phi to an instruction.
+  bool isBackedge(SUnit *Source, const SDep &Dep) {
+    if (Dep.getKind() != SDep::Anti)
+      return false;
+    return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
+  }
+
+  /// Return true if the dependence is an order dependence between non-Phis.
+  static bool isOrder(SUnit *Source, const SDep &Dep) {
+    if (Dep.getKind() != SDep::Order)
+      return false;
+    return (!Source->getInstr()->isPHI() &&
+            !Dep.getSUnit()->getInstr()->isPHI());
+  }
+
+  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
+
+  /// The latency of the dependence.
+  unsigned getLatency(SUnit *Source, const SDep &Dep) {
+    // Anti dependences represent recurrences, so use the latency of the
+    // instruction on the back-edge.
+    if (Dep.getKind() == SDep::Anti) {
+      if (Source->getInstr()->isPHI())
+        return Dep.getSUnit()->Latency;
+      if (Dep.getSUnit()->getInstr()->isPHI())
+        return Source->Latency;
+      return Dep.getLatency();
+    }
+    return Dep.getLatency();
+  }
+
+  /// The distance function, which indicates that operation V of iteration I
+  /// depends on operations U of iteration I-distance.
+  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
+    // Instructions that feed a Phi have a distance of 1. Computing larger
+    // values for arrays requires data dependence information.
+    if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
+      return 1;
+    return 0;
+  }
+
+  /// Set the Minimum Initiation Interval for this schedule attempt.
+  void setMII(unsigned mii) { MII = mii; }
+
+  MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
+                                 bool UpdateDAG = false);
+
+  /// Return the new base register that was stored away for the changed
+  /// instruction.
+  unsigned getInstrBaseReg(SUnit *SU) {
+    DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+        InstrChanges.find(SU);
+    if (It != InstrChanges.end())
+      return It->second.first;
+    return 0;
+  }
+
+  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+    Mutations.push_back(std::move(Mutation));
+  }
+
+private:
+  void addLoopCarriedDependences(AliasAnalysis *AA);
+  void updatePhiDependences();
+  void changeDependences();
+  unsigned calculateResMII();
+  unsigned calculateRecMII(NodeSetType &RecNodeSets);
+  void findCircuits(NodeSetType &NodeSets);
+  void fuseRecs(NodeSetType &NodeSets);
+  void removeDuplicateNodes(NodeSetType &NodeSets);
+  void computeNodeFunctions(NodeSetType &NodeSets);
+  void registerPressureFilter(NodeSetType &NodeSets);
+  void colocateNodeSets(NodeSetType &NodeSets);
+  void checkNodeSets(NodeSetType &NodeSets);
+  void groupRemainingNodes(NodeSetType &NodeSets);
+  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+                         SetVector<SUnit *> &NodesAdded);
+  void computeNodeOrder(NodeSetType &NodeSets);
+  bool schedulePipeline(SMSchedule &Schedule);
+  void generatePipelinedLoop(SMSchedule &Schedule);
+  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &PrologBBs);
+  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                            SMSchedule &Schedule, ValueMapTy *VRMap,
+                            InstrMapTy &InstrMap, unsigned LastStageNum,
+                            unsigned CurStageNum, bool IsLast);
+  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                    SMSchedule &Schedule, ValueMapTy *VRMap,
+                    InstrMapTy &InstrMap, unsigned LastStageNum,
+                    unsigned CurStageNum, bool IsLast);
+  void removeDeadInstructions(MachineBasicBlock *KernelBB,
+                              MBBVectorTy &EpilogBBs);
+  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+                      SMSchedule &Schedule);
+  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
+                   MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
+                   ValueMapTy *VRMap);
+  bool computeDelta(MachineInstr &MI, unsigned &Delta);
+  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+                         unsigned Num);
+  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                           unsigned InstStageNum);
+  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                                    unsigned InstStageNum,
+                                    SMSchedule &Schedule);
+  void updateInstruction(MachineInstr *NewMI, bool LastDef,
+                         unsigned CurStageNum, unsigned InstStageNum,
+                         SMSchedule &Schedule, ValueMapTy *VRMap);
+  MachineInstr *findDefInLoop(unsigned Reg);
+  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+                         unsigned LoopStage, ValueMapTy *VRMap,
+                         MachineBasicBlock *BB);
+  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+                        SMSchedule &Schedule, ValueMapTy *VRMap,
+                        InstrMapTy &InstrMap);
+  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
+                             InstrMapTy &InstrMap, unsigned CurStageNum,
+                             unsigned PhiNum, MachineInstr *Phi,
+                             unsigned OldReg, unsigned NewReg,
+                             unsigned PrevReg = 0);
+  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
+                             unsigned &OffsetPos, unsigned &NewBase,
+                             int64_t &NewOffset);
+  void postprocessDAG();
+};
+
+/// A NodeSet contains a set of SUnit DAG nodes with additional information
+/// that assigns a priority to the set.
+class NodeSet {
+  SetVector<SUnit *> Nodes;
+  bool HasRecurrence;
+  unsigned RecMII = 0;
+  int MaxMOV = 0;
+  int MaxDepth = 0;
+  unsigned Colocate = 0;
+  SUnit *ExceedPressure = nullptr;
+
+public:
+  typedef SetVector<SUnit *>::const_iterator iterator;
+
+  NodeSet() : Nodes(), HasRecurrence(false) {}
+
+  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
+
+  bool insert(SUnit *SU) { return Nodes.insert(SU); }
+
+  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
+
+  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
+    return Nodes.remove_if(P);
+  }
+
+  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
+
+  bool hasRecurrence() { return HasRecurrence; };
+
+  unsigned size() const { return Nodes.size(); }
+
+  bool empty() const { return Nodes.empty(); }
+
+  SUnit *getNode(unsigned i) const { return Nodes[i]; };
+
+  void setRecMII(unsigned mii) { RecMII = mii; };
+
+  void setColocate(unsigned c) { Colocate = c; };
+
+  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
+
+  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
+
+  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
+
+  int getRecMII() { return RecMII; }
+
+  /// Summarize node functions for the entire node set.
+  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
+    for (SUnit *SU : *this) {
+      MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
+      MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
+    }
+  }
+
+  void clear() {
+    Nodes.clear();
+    RecMII = 0;
+    HasRecurrence = false;
+    MaxMOV = 0;
+    MaxDepth = 0;
+    Colocate = 0;
+    ExceedPressure = nullptr;
+  }
+
+  operator SetVector<SUnit *> &() { return Nodes; }
+
+  /// Sort the node sets by importance. First, rank them by recurrence MII,
+  /// then by mobility (least mobile done first), and finally by depth.
+  /// Each node set may contain a colocate value which is used as the first
+  /// tie breaker, if it's set.
+  bool operator>(const NodeSet &RHS) const {
+    if (RecMII == RHS.RecMII) {
+      if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
+        return Colocate < RHS.Colocate;
+      if (MaxMOV == RHS.MaxMOV)
+        return MaxDepth > RHS.MaxDepth;
+      return MaxMOV < RHS.MaxMOV;
+    }
+    return RecMII > RHS.RecMII;
+  }
+
+  bool operator==(const NodeSet &RHS) const {
+    return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
+           MaxDepth == RHS.MaxDepth;
+  }
+
+  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
+
+  iterator begin() { return Nodes.begin(); }
+  iterator end() { return Nodes.end(); }
+
+  void print(raw_ostream &os) const {
+    os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+       << " depth " << MaxDepth << " col " << Colocate << "\n";
+    for (const auto &I : Nodes)
+      os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
+    os << "\n";
+  }
+
+  void dump() const { print(dbgs()); }
+};
+
+/// This class repesents the scheduled code.  The main data structure is a
+/// map from scheduled cycle to instructions.  During scheduling, the
+/// data structure explicitly represents all stages/iterations.   When
+/// the algorithm finshes, the schedule is collapsed into a single stage,
+/// which represents instructions from different loop iterations.
+///
+/// The SMS algorithm allows negative values for cycles, so the first cycle
+/// in the schedule is the smallest cycle value.
+class SMSchedule {
+private:
+  /// Map from execution cycle to instructions.
+  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
+
+  /// Map from instruction to execution cycle.
+  std::map<SUnit *, int> InstrToCycle;
+
+  /// Map for each register and the max difference between its uses and def.
+  /// The first element in the pair is the max difference in stages. The
+  /// second is true if the register defines a Phi value and loop value is
+  /// scheduled before the Phi.
+  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+  /// Keep track of the first cycle value in the schedule.  It starts
+  /// as zero, but the algorithm allows negative values.
+  int FirstCycle;
+
+  /// Keep track of the last cycle value in the schedule.
+  int LastCycle;
+
+  /// The initiation interval (II) for the schedule.
+  int InitiationInterval;
+
+  /// Target machine information.
+  const TargetSubtargetInfo &ST;
+
+  /// Virtual register information.
+  MachineRegisterInfo &MRI;
+
+  DFAPacketizer *Resources;
+
+public:
+  SMSchedule(MachineFunction *mf)
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
+    FirstCycle = 0;
+    LastCycle = 0;
+    InitiationInterval = 0;
+  }
+
+  ~SMSchedule() {
+    ScheduledInstrs.clear();
+    InstrToCycle.clear();
+    RegToStageDiff.clear();
+    delete Resources;
+  }
+
+  void reset() {
+    ScheduledInstrs.clear();
+    InstrToCycle.clear();
+    RegToStageDiff.clear();
+    FirstCycle = 0;
+    LastCycle = 0;
+    InitiationInterval = 0;
+  }
+
+  /// Set the initiation interval for this schedule.
+  void setInitiationInterval(int ii) { InitiationInterval = ii; }
+
+  /// Return the first cycle in the completed schedule.  This
+  /// can be a negative value.
+  int getFirstCycle() const { return FirstCycle; }
+
+  /// Return the last cycle in the finalized schedule.
+  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
+
+  /// Return the cycle of the earliest scheduled instruction in the dependence
+  /// chain.
+  int earliestCycleInChain(const SDep &Dep);
+
+  /// Return the cycle of the latest scheduled instruction in the dependence
+  /// chain.
+  int latestCycleInChain(const SDep &Dep);
+
+  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+                    int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+
+  /// Iterators for the cycle to instruction map.
+  typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
+  typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
+      const_sched_iterator;
+
+  /// Return true if the instruction is scheduled at the specified stage.
+  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
+    return (stageScheduled(SU) == (int)StageNum);
+  }
+
+  /// Return the stage for a scheduled instruction.  Return -1 if
+  /// the instruction has not been scheduled.
+  int stageScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    if (it == InstrToCycle.end())
+      return -1;
+    return (it->second - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the cycle for a scheduled instruction. This function normalizes
+  /// the first cycle to be 0.
+  unsigned cycleScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
+    return (it->second - FirstCycle) % InitiationInterval;
+  }
+
+  /// Return the maximum stage count needed for this schedule.
+  unsigned getMaxStageCount() {
+    return (LastCycle - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the max. number of stages/iterations that can occur between a
+  /// register definition and its uses.
+  unsigned getStagesForReg(int Reg, unsigned CurStage) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
+      return 1;
+    return Stages.first;
+  }
+
+  /// The number of stages for a Phi is a little different than other
+  /// instructions. The minimum value computed in RegToStageDiff is 1
+  /// because we assume the Phi is needed for at least 1 iteration.
+  /// This is not the case if the loop value is scheduled prior to the
+  /// Phi in the same stage.  This function returns the number of stages
+  /// or iterations needed between the Phi definition and any uses.
+  unsigned getStagesForPhi(int Reg) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (Stages.second)
+      return Stages.first;
+    return Stages.first - 1;
+  }
+
+  /// Return the instructions that are scheduled at the specified cycle.
+  std::deque<SUnit *> &getInstructions(int cycle) {
+    return ScheduledInstrs[cycle];
+  }
+
+  bool isValidSchedule(SwingSchedulerDAG *SSD);
+  void finalizeSchedule(SwingSchedulerDAG *SSD);
+  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+                       std::deque<SUnit *> &Insts);
+  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
+  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
+                             MachineOperand &MO);
+  void print(raw_ostream &os) const;
+  void dump() const;
+};
+
+} // end anonymous namespace
+
+unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
+char MachinePipeliner::ID = 0;
+#ifndef NDEBUG
+int MachinePipeliner::NumTries = 0;
+#endif
+char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
+                      "Modulo Software Pipelining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
+                    "Modulo Software Pipelining", false, false)
+
+/// The "main" function for implementing Swing Modulo Scheduling.
+bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
+  if (skipFunction(*mf.getFunction()))
+    return false;
+
+  if (!EnableSWP)
+    return false;
+
+  if (mf.getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+      !EnableSWPOptSize.getPosition())
+    return false;
+
+  MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  TII = MF->getSubtarget().getInstrInfo();
+  RegClassInfo.runOnMachineFunction(*MF);
+
+  for (auto &L : *MLI)
+    scheduleLoop(*L);
+
+  return false;
+}
+
+/// Attempt to perform the SMS algorithm on the specified loop. This function is
+/// the main entry point for the algorithm.  The function identifies candidate
+/// loops, calculates the minimum initiation interval, and attempts to schedule
+/// the loop.
+bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
+  bool Changed = false;
+  for (auto &InnerLoop : L)
+    Changed |= scheduleLoop(*InnerLoop);
+
+#ifndef NDEBUG
+  // Stop trying after reaching the limit (if any).
+  int Limit = SwpLoopLimit;
+  if (Limit >= 0) {
+    if (NumTries >= SwpLoopLimit)
+      return Changed;
+    NumTries++;
+  }
+#endif
+
+  if (!canPipelineLoop(L))
+    return Changed;
+
+  ++NumTrytoPipeline;
+
+  Changed = swingModuloScheduler(L);
+
+  return Changed;
+}
+
+/// Return true if the loop can be software pipelined.  The algorithm is
+/// restricted to loops with a single basic block.  Make sure that the
+/// branch in the loop can be analyzed.
+bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
+  if (L.getNumBlocks() != 1)
+    return false;
+
+  // Check if the branch can't be understood because we can't do pipelining
+  // if that's the case.
+  LI.TBB = nullptr;
+  LI.FBB = nullptr;
+  LI.BrCond.clear();
+  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+    return false;
+
+  LI.LoopInductionVar = nullptr;
+  LI.LoopCompare = nullptr;
+  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+    return false;
+
+  if (!L.getLoopPreheader())
+    return false;
+
+  // If any of the Phis contain subregs, then we can't pipeline
+  // because we don't know how to maintain subreg information in the
+  // VMap structure.
+  MachineBasicBlock *MBB = L.getHeader();
+  for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
+                                   BBE = MBB->getFirstNonPHI();
+       BBI != BBE; ++BBI)
+    for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
+      if (BBI->getOperand(i).getSubReg() != 0)
+        return false;
+
+  return true;
+}
+
+/// The SMS algorithm consists of the following main steps:
+/// 1. Computation and analysis of the dependence graph.
+/// 2. Ordering of the nodes (instructions).
+/// 3. Attempt to Schedule the loop.
+bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
+  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+
+  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+
+  MachineBasicBlock *MBB = L.getHeader();
+  // The kernel should not include any terminator instructions.  These
+  // will be added back later.
+  SMS.startBlock(MBB);
+
+  // Compute the number of 'real' instructions in the basic block by
+  // ignoring terminators.
+  unsigned size = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
+                                   E = MBB->instr_end();
+       I != E; ++I, --size)
+    ;
+
+  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
+  SMS.schedule();
+  SMS.exitRegion();
+
+  SMS.finishBlock();
+  return SMS.hasNewSchedule();
+}
+
+/// We override the schedule function in ScheduleDAGInstrs to implement the
+/// scheduling part of the Swing Modulo Scheduling algorithm.
+void SwingSchedulerDAG::schedule() {
+  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
+  buildSchedGraph(AA);
+  addLoopCarriedDependences(AA);
+  updatePhiDependences();
+  Topo.InitDAGTopologicalSorting();
+  postprocessDAG();
+  changeDependences();
+  DEBUG({
+    for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+      SUnits[su].dumpAll(this);
+  });
+
+  NodeSetType NodeSets;
+  findCircuits(NodeSets);
+
+  // Calculate the MII.
+  unsigned ResMII = calculateResMII();
+  unsigned RecMII = calculateRecMII(NodeSets);
+
+  fuseRecs(NodeSets);
+
+  // This flag is used for testing and can cause correctness problems.
+  if (SwpIgnoreRecMII)
+    RecMII = 0;
+
+  MII = std::max(ResMII, RecMII);
+  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
+               << ")\n");
+
+  // Can't schedule a loop without a valid MII.
+  if (MII == 0)
+    return;
+
+  // Don't pipeline large loops.
+  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+    return;
+
+  computeNodeFunctions(NodeSets);
+
+  registerPressureFilter(NodeSets);
+
+  colocateNodeSets(NodeSets);
+
+  checkNodeSets(NodeSets);
+
+  DEBUG({
+    for (auto &I : NodeSets) {
+      dbgs() << "  Rec NodeSet ";
+      I.dump();
+    }
+  });
+
+  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+
+  groupRemainingNodes(NodeSets);
+
+  removeDuplicateNodes(NodeSets);
+
+  DEBUG({
+    for (auto &I : NodeSets) {
+      dbgs() << "  NodeSet ";
+      I.dump();
+    }
+  });
+
+  computeNodeOrder(NodeSets);
+
+  SMSchedule Schedule(Pass.MF);
+  Scheduled = schedulePipeline(Schedule);
+
+  if (!Scheduled)
+    return;
+
+  unsigned numStages = Schedule.getMaxStageCount();
+  // No need to generate pipeline if there are no overlapped iterations.
+  if (numStages == 0)
+    return;
+
+  // Check that the maximum stage count is less than user-defined limit.
+  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+    return;
+
+  generatePipelinedLoop(Schedule);
+  ++NumPipelined;
+}
+
+/// Clean up after the software pipeliner runs.
+void SwingSchedulerDAG::finishBlock() {
+  for (MachineInstr *I : NewMIs)
+    MF.DeleteMachineInstr(I);
+  NewMIs.clear();
+
+  // Call the superclass.
+  ScheduleDAGInstrs::finishBlock();
+}
+
+/// Return the register values for  the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+                       unsigned &InitVal, unsigned &LoopVal) {
+  assert(Phi.isPHI() && "Expecting a Phi.");
+
+  InitVal = 0;
+  LoopVal = 0;
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() != Loop)
+      InitVal = Phi.getOperand(i).getReg();
+    else if (Phi.getOperand(i + 1).getMBB() == Loop)
+      LoopVal = Phi.getOperand(i).getReg();
+
+  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+      return Phi.getOperand(i).getReg();
+  return 0;
+}
+
+/// Return the Phi register value that comes the the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+      return Phi.getOperand(i).getReg();
+  return 0;
+}
+
+/// Return true if SUb can be reached from SUa following the chain edges.
+static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SUnit *, 8> Worklist;
+  Worklist.push_back(SUa);
+  while (!Worklist.empty()) {
+    const SUnit *SU = Worklist.pop_back_val();
+    for (auto &SI : SU->Succs) {
+      SUnit *SuccSU = SI.getSUnit();
+      if (SI.getKind() == SDep::Order) {
+        if (Visited.count(SuccSU))
+          continue;
+        if (SuccSU == SUb)
+          return true;
+        Worklist.push_back(SuccSU);
+        Visited.insert(SuccSU);
+      }
+    }
+  }
+  return false;
+}
+
+/// Return true if the instruction causes a chain between memory
+/// references before and after it.
+static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
+  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+         (MI.hasOrderedMemoryRef() &&
+          (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
+}
+
+/// Return the underlying objects for the memory references of an instruction.
+/// This function calls the code in ValueTracking, but first checks that the
+/// instruction has a memory operand.
+static void getUnderlyingObjects(MachineInstr *MI,
+                                 SmallVectorImpl<Value *> &Objs,
+                                 const DataLayout &DL) {
+  if (!MI->hasOneMemOperand())
+    return;
+  MachineMemOperand *MM = *MI->memoperands_begin();
+  if (!MM->getValue())
+    return;
+  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+}
+
+/// Add a chain edge between a load and store if the store can be an
+/// alias of the load on a subsequent iteration, i.e., a loop carried
+/// dependence. This code is very similar to the code in ScheduleDAGInstrs
+/// but that code doesn't create loop carried dependences.
+void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
+  MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+  for (auto &SU : SUnits) {
+    MachineInstr &MI = *SU.getInstr();
+    if (isDependenceBarrier(MI, AA))
+      PendingLoads.clear();
+    else if (MI.mayLoad()) {
+      SmallVector<Value *, 4> Objs;
+      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      for (auto V : Objs) {
+        SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
+        SUs.push_back(&SU);
+      }
+    } else if (MI.mayStore()) {
+      SmallVector<Value *, 4> Objs;
+      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      for (auto V : Objs) {
+        MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+            PendingLoads.find(V);
+        if (I == PendingLoads.end())
+          continue;
+        for (auto Load : I->second) {
+          if (isSuccOrder(Load, &SU))
+            continue;
+          MachineInstr &LdMI = *Load->getInstr();
+          // First, perform the cheaper check that compares the base register.
+          // If they are the same and the load offset is less than the store
+          // offset, then mark the dependence as loop carried potentially.
+          unsigned BaseReg1, BaseReg2;
+          int64_t Offset1, Offset2;
+          if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
+              !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;            
+          }
+          if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+            assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+                   "What happened to the chain edge?");
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          // Second, the more expensive check that uses alias analysis on the
+          // base registers. If they alias, and the load offset is less than
+          // the store offset, the mark the dependence as loop carried.
+          if (!AA) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
+          MachineMemOperand *MMO2 = *MI.memoperands_begin();
+          if (!MMO1->getValue() || !MMO2->getValue()) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          if (MMO1->getValue() == MMO2->getValue() &&
+              MMO1->getOffset() <= MMO2->getOffset()) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          AliasResult AAResult = AA->alias(
+              MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+                             MMO1->getAAInfo()),
+              MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+                             MMO2->getAAInfo()));
+
+          if (AAResult != NoAlias)
+            SU.addPred(SDep(Load, SDep::Barrier));
+        }
+      }
+    }
+  }
+}
+
+/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
+/// processes dependences for PHIs. This function adds true dependences
+/// from a PHI to a use, and a loop carried dependence from the use to the
+/// PHI. The loop carried dependence is represented as an anti dependence
+/// edge. This function also removes chain dependences between unrelated
+/// PHIs.
+void SwingSchedulerDAG::updatePhiDependences() {
+  SmallVector<SDep, 4> RemoveDeps;
+  const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
+
+  // Iterate over each DAG node.
+  for (SUnit &I : SUnits) {
+    RemoveDeps.clear();
+    // Set to true if the instruction has an operand defined by a Phi.
+    unsigned HasPhiUse = 0;
+    unsigned HasPhiDef = 0;
+    MachineInstr *MI = I.getInstr();
+    // Iterate over each operand, and we process the definitions.
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      if (!MOI->isReg())
+        continue;
+      unsigned Reg = MOI->getReg();
+      if (MOI->isDef()) {
+        // If the register is used by a Phi, then create an anti dependence.
+        for (MachineRegisterInfo::use_instr_iterator
+                 UI = MRI.use_instr_begin(Reg),
+                 UE = MRI.use_instr_end();
+             UI != UE; ++UI) {
+          MachineInstr *UseMI = &*UI;
+          SUnit *SU = getSUnit(UseMI);
+          if (SU != nullptr && UseMI->isPHI()) {
+            if (!MI->isPHI()) {
+              SDep Dep(SU, SDep::Anti, Reg);
+              I.addPred(Dep);
+            } else {
+              HasPhiDef = Reg;
+              // Add a chain edge to a dependent Phi that isn't an existing
+              // predecessor.
+              if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+                I.addPred(SDep(SU, SDep::Barrier));
+            }
+          }
+        }
+      } else if (MOI->isUse()) {
+        // If the register is defined by a Phi, then create a true dependence.
+        MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+        if (DefMI == nullptr)
+          continue;
+        SUnit *SU = getSUnit(DefMI);
+        if (SU != nullptr && DefMI->isPHI()) {
+          if (!MI->isPHI()) {
+            SDep Dep(SU, SDep::Data, Reg);
+            Dep.setLatency(0);
+            ST.adjustSchedDependency(SU, &I, Dep);
+            I.addPred(Dep);
+          } else {
+            HasPhiUse = Reg;
+            // Add a chain edge to a dependent Phi that isn't an existing
+            // predecessor.
+            if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+              I.addPred(SDep(SU, SDep::Barrier));
+          }
+        }
+      }
+    }
+    // Remove order dependences from an unrelated Phi.
+    if (!SwpPruneDeps)
+      continue;
+    for (auto &PI : I.Preds) {
+      MachineInstr *PMI = PI.getSUnit()->getInstr();
+      if (PMI->isPHI() && PI.getKind() == SDep::Order) {
+        if (I.getInstr()->isPHI()) {
+          if (PMI->getOperand(0).getReg() == HasPhiUse)
+            continue;
+          if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
+            continue;
+        }
+        RemoveDeps.push_back(PI);
+      }
+    }
+    for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
+      I.removePred(RemoveDeps[i]);
+  }
+}
+
+/// Iterate over each DAG node and see if we can change any dependences
+/// in order to reduce the recurrence MII.
+void SwingSchedulerDAG::changeDependences() {
+  // See if an instruction can use a value from the previous iteration.
+  // If so, we update the base and offset of the instruction and change
+  // the dependences.
+  for (SUnit &I : SUnits) {
+    unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
+    int64_t NewOffset = 0;
+    if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
+                               NewOffset))
+      continue;
+
+    // Get the MI and SUnit for the instruction that defines the original base.
+    unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+    MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
+    if (!DefMI)
+      continue;
+    SUnit *DefSU = getSUnit(DefMI);
+    if (!DefSU)
+      continue;
+    // Get the MI and SUnit for the instruction that defins the new base.
+    MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
+    if (!LastMI)
+      continue;
+    SUnit *LastSU = getSUnit(LastMI);
+    if (!LastSU)
+      continue;
+
+    if (Topo.IsReachable(&I, LastSU))
+      continue;
+
+    // Remove the dependence. The value now depends on a prior iteration.
+    SmallVector<SDep, 4> Deps;
+    for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
+         ++P)
+      if (P->getSUnit() == DefSU)
+        Deps.push_back(*P);
+    for (int i = 0, e = Deps.size(); i != e; i++) {
+      Topo.RemovePred(&I, Deps[i].getSUnit());
+      I.removePred(Deps[i]);
+    }
+    // Remove the chain dependence between the instructions.
+    Deps.clear();
+    for (auto &P : LastSU->Preds)
+      if (P.getSUnit() == &I && P.getKind() == SDep::Order)
+        Deps.push_back(P);
+    for (int i = 0, e = Deps.size(); i != e; i++) {
+      Topo.RemovePred(LastSU, Deps[i].getSUnit());
+      LastSU->removePred(Deps[i]);
+    }
+
+    // Add a dependence between the new instruction and the instruction
+    // that defines the new base.
+    SDep Dep(&I, SDep::Anti, NewBase);
+    LastSU->addPred(Dep);
+
+    // Remember the base and offset information so that we can update the
+    // instruction during code generation.
+    InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
+  }
+}
+
+namespace {
+
+// FuncUnitSorter - Comparison operator used to sort instructions by
+// the number of functional unit choices.
+struct FuncUnitSorter {
+  const InstrItineraryData *InstrItins;
+  DenseMap<unsigned, unsigned> Resources;
+
+  // Compute the number of functional unit alternatives needed
+  // at each stage, and take the minimum value. We prioritize the
+  // instructions by the least number of choices first.
+  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
+    unsigned schedClass = Inst->getDesc().getSchedClass();
+    unsigned min = UINT_MAX;
+    for (const InstrStage *IS = InstrItins->beginStage(schedClass),
+                          *IE = InstrItins->endStage(schedClass);
+         IS != IE; ++IS) {
+      unsigned funcUnits = IS->getUnits();
+      unsigned numAlternatives = countPopulation(funcUnits);
+      if (numAlternatives < min) {
+        min = numAlternatives;
+        F = funcUnits;
+      }
+    }
+    return min;
+  }
+
+  // Compute the critical resources needed by the instruction. This
+  // function records the functional units needed by instructions that
+  // must use only one functional unit. We use this as a tie breaker
+  // for computing the resource MII. The instrutions that require
+  // the same, highly used, functional unit have high priority.
+  void calcCriticalResources(MachineInstr &MI) {
+    unsigned SchedClass = MI.getDesc().getSchedClass();
+    for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
+                          *IE = InstrItins->endStage(SchedClass);
+         IS != IE; ++IS) {
+      unsigned FuncUnits = IS->getUnits();
+      if (countPopulation(FuncUnits) == 1)
+        Resources[FuncUnits]++;
+    }
+  }
+
+  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+  /// Return true if IS1 has less priority than IS2.
+  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
+    unsigned F1 = 0, F2 = 0;
+    unsigned MFUs1 = minFuncUnits(IS1, F1);
+    unsigned MFUs2 = minFuncUnits(IS2, F2);
+    if (MFUs1 == 1 && MFUs2 == 1)
+      return Resources.lookup(F1) < Resources.lookup(F2);
+    return MFUs1 > MFUs2;
+  }
+};
+
+} // end anonymous namespace
+
+/// Calculate the resource constrained minimum initiation interval for the
+/// specified loop. We use the DFA to model the resources needed for
+/// each instruction, and we ignore dependences. A different DFA is created
+/// for each cycle that is required. When adding a new instruction, we attempt
+/// to add it to each existing DFA, until a legal space is found. If the
+/// instruction cannot be reserved in an existing DFA, we create a new one.
+unsigned SwingSchedulerDAG::calculateResMII() {
+  SmallVector<DFAPacketizer *, 8> Resources;
+  MachineBasicBlock *MBB = Loop.getHeader();
+  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+
+  // Sort the instructions by the number of available choices for scheduling,
+  // least to most. Use the number of critical resources as the tie breaker.
+  FuncUnitSorter FUS =
+      FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+                                   E = MBB->getFirstTerminator();
+       I != E; ++I)
+    FUS.calcCriticalResources(*I);
+  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+      FuncUnitOrder(FUS);
+
+  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+                                   E = MBB->getFirstTerminator();
+       I != E; ++I)
+    FuncUnitOrder.push(&*I);
+
+  while (!FuncUnitOrder.empty()) {
+    MachineInstr *MI = FuncUnitOrder.top();
+    FuncUnitOrder.pop();
+    if (TII->isZeroCost(MI->getOpcode()))
+      continue;
+    // Attempt to reserve the instruction in an existing DFA. At least one
+    // DFA is needed for each cycle.
+    unsigned NumCycles = getSUnit(MI)->Latency;
+    unsigned ReservedCycles = 0;
+    SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
+    SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+    for (unsigned C = 0; C < NumCycles; ++C)
+      while (RI != RE) {
+        if ((*RI++)->canReserveResources(*MI)) {
+          ++ReservedCycles;
+          break;
+        }
+      }
+    // Start reserving resources using existing DFAs.
+    for (unsigned C = 0; C < ReservedCycles; ++C) {
+      --RI;
+      (*RI)->reserveResources(*MI);
+    }
+    // Add new DFAs, if needed, to reserve resources.
+    for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+      DFAPacketizer *NewResource =
+          TII->CreateTargetScheduleState(MF.getSubtarget());
+      assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+      NewResource->reserveResources(*MI);
+      Resources.push_back(NewResource);
+    }
+  }
+  int Resmii = Resources.size();
+  // Delete the memory for each of the DFAs that were created earlier.
+  for (DFAPacketizer *RI : Resources) {
+    DFAPacketizer *D = RI;
+    delete D;
+  }
+  Resources.clear();
+  return Resmii;
+}
+
+/// Calculate the recurrence-constrainted minimum initiation interval.
+/// Iterate over each circuit.  Compute the delay(c) and distance(c)
+/// for each circuit. The II needs to satisfy the inequality
+/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
+/// II that satistifies the inequality, and the RecMII is the maximum
+/// of those values.
+unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
+  unsigned RecMII = 0;
+
+  for (NodeSet &Nodes : NodeSets) {
+    if (Nodes.size() == 0)
+      continue;
+
+    unsigned Delay = Nodes.size() - 1;
+    unsigned Distance = 1;
+
+    // ii = ceil(delay / distance)
+    unsigned CurMII = (Delay + Distance - 1) / Distance;
+    Nodes.setRecMII(CurMII);
+    if (CurMII > RecMII)
+      RecMII = CurMII;
+  }
+
+  return RecMII;
+}
+
+/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+/// but we do this to find the circuits, and then change them back.
+static void swapAntiDependences(std::vector<SUnit> &SUnits) {
+  SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
+         IP != EP; ++IP) {
+      if (IP->getKind() != SDep::Anti)
+        continue;
+      DepsAdded.push_back(std::make_pair(SU, *IP));
+    }
+  }
+  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
+                                                          E = DepsAdded.end();
+       I != E; ++I) {
+    // Remove this anti dependency and add one in the reverse direction.
+    SUnit *SU = I->first;
+    SDep &D = I->second;
+    SUnit *TargetSU = D.getSUnit();
+    unsigned Reg = D.getReg();
+    unsigned Lat = D.getLatency();
+    SU->removePred(D);
+    SDep Dep(SU, SDep::Anti, Reg);
+    Dep.setLatency(Lat);
+    TargetSU->addPred(Dep);
+  }
+}
+
+/// Create the adjacency structure of the nodes in the graph.
+void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
+    SwingSchedulerDAG *DAG) {
+  BitVector Added(SUnits.size());
+  for (int i = 0, e = SUnits.size(); i != e; ++i) {
+    Added.reset();
+    // Add any successor to the adjacency matrix and exclude duplicates.
+    for (auto &SI : SUnits[i].Succs) {
+      // Do not process a boundary node and a back-edge is processed only
+      // if it goes to a Phi.
+      if (SI.getSUnit()->isBoundaryNode() ||
+          (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
+        continue;
+      int N = SI.getSUnit()->NodeNum;
+      if (!Added.test(N)) {
+        AdjK[i].push_back(N);
+        Added.set(N);
+      }
+    }
+    // A chain edge between a store and a load is treated as a back-edge in the
+    // adjacency matrix.
+    for (auto &PI : SUnits[i].Preds) {
+      if (!SUnits[i].getInstr()->mayStore() ||
+          !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
+        continue;
+      if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
+        int N = PI.getSUnit()->NodeNum;
+        if (!Added.test(N)) {
+          AdjK[i].push_back(N);
+          Added.set(N);
+        }
+      }
+    }
+  }
+}
+
+/// Identify an elementary circuit in the dependence graph starting at the
+/// specified node.
+bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
+                                          bool HasBackedge) {
+  SUnit *SV = &SUnits[V];
+  bool F = false;
+  Stack.insert(SV);
+  Blocked.set(V);
+
+  for (auto W : AdjK[V]) {
+    if (NumPaths > MaxPaths)
+      break;
+    if (W < S)
+      continue;
+    if (W == S) {
+      if (!HasBackedge)
+        NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
+      F = true;
+      ++NumPaths;
+      break;
+    } else if (!Blocked.test(W)) {
+      if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+        F = true;
+    }
+  }
+
+  if (F)
+    unblock(V);
+  else {
+    for (auto W : AdjK[V]) {
+      if (W < S)
+        continue;
+      if (B[W].count(SV) == 0)
+        B[W].insert(SV);
+    }
+  }
+  Stack.pop_back();
+  return F;
+}
+
+/// Unblock a node in the circuit finding algorithm.
+void SwingSchedulerDAG::Circuits::unblock(int U) {
+  Blocked.reset(U);
+  SmallPtrSet<SUnit *, 4> &BU = B[U];
+  while (!BU.empty()) {
+    SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
+    assert(SI != BU.end() && "Invalid B set.");
+    SUnit *W = *SI;
+    BU.erase(W);
+    if (Blocked.test(W->NodeNum))
+      unblock(W->NodeNum);
+  }
+}
+
+/// Identify all the elementary circuits in the dependence graph using
+/// Johnson's circuit algorithm.
+void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
+  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+  // but we do this to find the circuits, and then change them back.
+  swapAntiDependences(SUnits);
+
+  Circuits Cir(SUnits);
+  // Create the adjacency structure.
+  Cir.createAdjacencyStructure(this);
+  for (int i = 0, e = SUnits.size(); i != e; ++i) {
+    Cir.reset();
+    Cir.circuit(i, i, NodeSets);
+  }
+
+  // Change the dependences back so that we've created a DAG again.
+  swapAntiDependences(SUnits);
+}
+
+/// Return true for DAG nodes that we ignore when computing the cost functions.
+/// We ignore the back-edge recurrence in order to avoid unbounded recurison
+/// in the calculation of the ASAP, ALAP, etc functions.
+static bool ignoreDependence(const SDep &D, bool isPred) {
+  if (D.isArtificial())
+    return true;
+  return D.getKind() == SDep::Anti && isPred;
+}
+
+/// Compute several functions need to order the nodes for scheduling.
+///  ASAP - Earliest time to schedule a node.
+///  ALAP - Latest time to schedule a node.
+///  MOV - Mobility function, difference between ALAP and ASAP.
+///  D - Depth of each node.
+///  H - Height of each node.
+void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
+
+  ScheduleInfo.resize(SUnits.size());
+
+  DEBUG({
+    for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+                                                    E = Topo.end();
+         I != E; ++I) {
+      SUnit *SU = &SUnits[*I];
+      SU->dump(this);
+    }
+  });
+
+  int maxASAP = 0;
+  // Compute ASAP.
+  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+                                                  E = Topo.end();
+       I != E; ++I) {
+    int asap = 0;
+    SUnit *SU = &SUnits[*I];
+    for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
+                                    EP = SU->Preds.end();
+         IP != EP; ++IP) {
+      if (ignoreDependence(*IP, true))
+        continue;
+      SUnit *pred = IP->getSUnit();
+      asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
+                                  getDistance(pred, SU, *IP) * MII));
+    }
+    maxASAP = std::max(maxASAP, asap);
+    ScheduleInfo[*I].ASAP = asap;
+  }
+
+  // Compute ALAP and MOV.
+  for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
+                                                          E = Topo.rend();
+       I != E; ++I) {
+    int alap = maxASAP;
+    SUnit *SU = &SUnits[*I];
+    for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
+                                    ES = SU->Succs.end();
+         IS != ES; ++IS) {
+      if (ignoreDependence(*IS, true))
+        continue;
+      SUnit *succ = IS->getSUnit();
+      alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
+                                  getDistance(SU, succ, *IS) * MII));
+    }
+
+    ScheduleInfo[*I].ALAP = alap;
+  }
+
+  // After computing the node functions, compute the summary for each node set.
+  for (NodeSet &I : NodeSets)
+    I.computeNodeSetInfo(this);
+
+  DEBUG({
+    for (unsigned i = 0; i < SUnits.size(); i++) {
+      dbgs() << "\tNode " << i << ":\n";
+      dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";
+      dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";
+      dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";
+      dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";
+      dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";
+    }
+  });
+}
+
+/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
+/// as the predecessors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool pred_L(SetVector<SUnit *> &NodeOrder,
+                   SmallSetVector<SUnit *, 8> &Preds,
+                   const NodeSet *S = nullptr) {
+  Preds.clear();
+  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+       I != E; ++I) {
+    for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
+         PI != PE; ++PI) {
+      if (S && S->count(PI->getSUnit()) == 0)
+        continue;
+      if (ignoreDependence(*PI, true))
+        continue;
+      if (NodeOrder.count(PI->getSUnit()) == 0)
+        Preds.insert(PI->getSUnit());
+    }
+    // Back-edges are predecessors with an anti-dependence.
+    for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
+                                    ES = (*I)->Succs.end();
+         IS != ES; ++IS) {
+      if (IS->getKind() != SDep::Anti)
+        continue;
+      if (S && S->count(IS->getSUnit()) == 0)
+        continue;
+      if (NodeOrder.count(IS->getSUnit()) == 0)
+        Preds.insert(IS->getSUnit());
+    }
+  }
+  return Preds.size() > 0;
+}
+
+/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
+/// as the successors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool succ_L(SetVector<SUnit *> &NodeOrder,
+                   SmallSetVector<SUnit *, 8> &Succs,
+                   const NodeSet *S = nullptr) {
+  Succs.clear();
+  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+       I != E; ++I) {
+    for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
+         SI != SE; ++SI) {
+      if (S && S->count(SI->getSUnit()) == 0)
+        continue;
+      if (ignoreDependence(*SI, false))
+        continue;
+      if (NodeOrder.count(SI->getSUnit()) == 0)
+        Succs.insert(SI->getSUnit());
+    }
+    for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
+                                    PE = (*I)->Preds.end();
+         PI != PE; ++PI) {
+      if (PI->getKind() != SDep::Anti)
+        continue;
+      if (S && S->count(PI->getSUnit()) == 0)
+        continue;
+      if (NodeOrder.count(PI->getSUnit()) == 0)
+        Succs.insert(PI->getSUnit());
+    }
+  }
+  return Succs.size() > 0;
+}
+
+/// Return true if there is a path from the specified node to any of the nodes
+/// in DestNodes. Keep track and return the nodes in any path.
+static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
+                        SetVector<SUnit *> &DestNodes,
+                        SetVector<SUnit *> &Exclude,
+                        SmallPtrSet<SUnit *, 8> &Visited) {
+  if (Cur->isBoundaryNode())
+    return false;
+  if (Exclude.count(Cur) != 0)
+    return false;
+  if (DestNodes.count(Cur) != 0)
+    return true;
+  if (!Visited.insert(Cur).second)
+    return Path.count(Cur) != 0;
+  bool FoundPath = false;
+  for (auto &SI : Cur->Succs)
+    FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+  for (auto &PI : Cur->Preds)
+    if (PI.getKind() == SDep::Anti)
+      FoundPath |=
+          computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
+  if (FoundPath)
+    Path.insert(Cur);
+  return FoundPath;
+}
+
+/// Return true if Set1 is a subset of Set2.
+template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
+  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
+    if (Set2.count(*I) == 0)
+      return false;
+  return true;
+}
+
+/// Compute the live-out registers for the instructions in a node-set.
+/// The live-out registers are those that are defined in the node-set,
+/// but not used. Except for use operands of Phis.
+static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
+                            NodeSet &NS) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SmallVector<RegisterMaskPair, 8> LiveOutRegs;
+  SmallSet<unsigned, 4> Uses;
+  for (SUnit *SU : NS) {
+    const MachineInstr *MI = SU->getInstr();
+    if (MI->isPHI())
+      continue;
+    for (const MachineOperand &MO : MI->operands())
+      if (MO.isReg() && MO.isUse()) {
+        unsigned Reg = MO.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg))
+          Uses.insert(Reg);
+        else if (MRI.isAllocatable(Reg))
+          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+            Uses.insert(*Units);
+      }
+  }
+  for (SUnit *SU : NS)
+    for (const MachineOperand &MO : SU->getInstr()->operands())
+      if (MO.isReg() && MO.isDef() && !MO.isDead()) {
+        unsigned Reg = MO.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+          if (!Uses.count(Reg))
+            LiveOutRegs.push_back(RegisterMaskPair(Reg,
+                                                   LaneBitmask::getNone()));
+        } else if (MRI.isAllocatable(Reg)) {
+          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+            if (!Uses.count(*Units))
+              LiveOutRegs.push_back(RegisterMaskPair(*Units,
+                                                     LaneBitmask::getNone()));
+        }
+      }
+  RPTracker.addLiveRegs(LiveOutRegs);
+}
+
+/// A heuristic to filter nodes in recurrent node-sets if the register
+/// pressure of a set is too high.
+void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
+  for (auto &NS : NodeSets) {
+    // Skip small node-sets since they won't cause register pressure problems.
+    if (NS.size() <= 2)
+      continue;
+    IntervalPressure RecRegPressure;
+    RegPressureTracker RecRPTracker(RecRegPressure);
+    RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
+    computeLiveOuts(MF, RecRPTracker, NS);
+    RecRPTracker.closeBottom();
+
+    std::vector<SUnit *> SUnits(NS.begin(), NS.end());
+    std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
+      return A->NodeNum > B->NodeNum;
+    });
+
+    for (auto &SU : SUnits) {
+      // Since we're computing the register pressure for a subset of the
+      // instructions in a block, we need to set the tracker for each
+      // instruction in the node-set. The tracker is set to the instruction
+      // just after the one we're interested in.
+      MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
+      RecRPTracker.setPos(std::next(CurInstI));
+
+      RegPressureDelta RPDelta;
+      ArrayRef<PressureChange> CriticalPSets;
+      RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
+                                             CriticalPSets,
+                                             RecRegPressure.MaxSetPressure);
+      if (RPDelta.Excess.isValid()) {
+        DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
+                     << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
+                     << ":" << RPDelta.Excess.getUnitInc());
+        NS.setExceedPressure(SU);
+        break;
+      }
+      RecRPTracker.recede();
+    }
+  }
+}
+
+/// A heuristic to colocate node sets that have the same set of
+/// successors.
+void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
+  unsigned Colocate = 0;
+  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
+    NodeSet &N1 = NodeSets[i];
+    SmallSetVector<SUnit *, 8> S1;
+    if (N1.empty() || !succ_L(N1, S1))
+      continue;
+    for (int j = i + 1; j < e; ++j) {
+      NodeSet &N2 = NodeSets[j];
+      if (N1.compareRecMII(N2) != 0)
+        continue;
+      SmallSetVector<SUnit *, 8> S2;
+      if (N2.empty() || !succ_L(N2, S2))
+        continue;
+      if (isSubset(S1, S2) && S1.size() == S2.size()) {
+        N1.setColocate(++Colocate);
+        N2.setColocate(Colocate);
+        break;
+      }
+    }
+  }
+}
+
+/// Check if the existing node-sets are profitable. If not, then ignore the
+/// recurrent node-sets, and attempt to schedule all nodes together. This is
+/// a heuristic. If the MII is large and there is a non-recurrent node with
+/// a large depth compared to the MII, then it's best to try and schedule
+/// all instruction together instead of starting with the recurrent node-sets.
+void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
+  // Look for loops with a large MII.
+  if (MII <= 20)
+    return;
+  // Check if the node-set contains only a simple add recurrence.
+  for (auto &NS : NodeSets)
+    if (NS.size() > 2)
+      return;
+  // If the depth of any instruction is significantly larger than the MII, then
+  // ignore the recurrent node-sets and treat all instructions equally.
+  for (auto &SU : SUnits)
+    if (SU.getDepth() > MII * 1.5) {
+      NodeSets.clear();
+      DEBUG(dbgs() << "Clear recurrence node-sets\n");
+      return;
+    }
+}
+
+/// Add the nodes that do not belong to a recurrence set into groups
+/// based upon connected componenets.
+void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
+  SetVector<SUnit *> NodesAdded;
+  SmallPtrSet<SUnit *, 8> Visited;
+  // Add the nodes that are on a path between the previous node sets and
+  // the current node set.
+  for (NodeSet &I : NodeSets) {
+    SmallSetVector<SUnit *, 8> N;
+    // Add the nodes from the current node set to the previous node set.
+    if (succ_L(I, N)) {
+      SetVector<SUnit *> Path;
+      for (SUnit *NI : N) {
+        Visited.clear();
+        computePath(NI, Path, NodesAdded, I, Visited);
+      }
+      if (Path.size() > 0)
+        I.insert(Path.begin(), Path.end());
+    }
+    // Add the nodes from the previous node set to the current node set.
+    N.clear();
+    if (succ_L(NodesAdded, N)) {
+      SetVector<SUnit *> Path;
+      for (SUnit *NI : N) {
+        Visited.clear();
+        computePath(NI, Path, I, NodesAdded, Visited);
+      }
+      if (Path.size() > 0)
+        I.insert(Path.begin(), Path.end());
+    }
+    NodesAdded.insert(I.begin(), I.end());
+  }
+
+  // Create a new node set with the connected nodes of any successor of a node
+  // in a recurrent set.
+  NodeSet NewSet;
+  SmallSetVector<SUnit *, 8> N;
+  if (succ_L(NodesAdded, N))
+    for (SUnit *I : N)
+      addConnectedNodes(I, NewSet, NodesAdded);
+  if (NewSet.size() > 0)
+    NodeSets.push_back(NewSet);
+
+  // Create a new node set with the connected nodes of any predecessor of a node
+  // in a recurrent set.
+  NewSet.clear();
+  if (pred_L(NodesAdded, N))
+    for (SUnit *I : N)
+      addConnectedNodes(I, NewSet, NodesAdded);
+  if (NewSet.size() > 0)
+    NodeSets.push_back(NewSet);
+
+  // Create new nodes sets with the connected nodes any any remaining node that
+  // has no predecessor.
+  for (unsigned i = 0; i < SUnits.size(); ++i) {
+    SUnit *SU = &SUnits[i];
+    if (NodesAdded.count(SU) == 0) {
+      NewSet.clear();
+      addConnectedNodes(SU, NewSet, NodesAdded);
+      if (NewSet.size() > 0)
+        NodeSets.push_back(NewSet);
+    }
+  }
+}
+
+/// Add the node to the set, and add all is its connected nodes to the set.
+void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+                                          SetVector<SUnit *> &NodesAdded) {
+  NewSet.insert(SU);
+  NodesAdded.insert(SU);
+  for (auto &SI : SU->Succs) {
+    SUnit *Successor = SI.getSUnit();
+    if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
+      addConnectedNodes(Successor, NewSet, NodesAdded);
+  }
+  for (auto &PI : SU->Preds) {
+    SUnit *Predecessor = PI.getSUnit();
+    if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
+      addConnectedNodes(Predecessor, NewSet, NodesAdded);
+  }
+}
+
+/// Return true if Set1 contains elements in Set2. The elements in common
+/// are returned in a different container.
+static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
+                        SmallSetVector<SUnit *, 8> &Result) {
+  Result.clear();
+  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
+    SUnit *SU = Set1[i];
+    if (Set2.count(SU) != 0)
+      Result.insert(SU);
+  }
+  return !Result.empty();
+}
+
+/// Merge the recurrence node sets that have the same initial node.
+void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
+  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+       ++I) {
+    NodeSet &NI = *I;
+    for (NodeSetType::iterator J = I + 1; J != E;) {
+      NodeSet &NJ = *J;
+      if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
+        if (NJ.compareRecMII(NI) > 0)
+          NI.setRecMII(NJ.getRecMII());
+        for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
+             ++NII)
+          I->insert(*NII);
+        NodeSets.erase(J);
+        E = NodeSets.end();
+      } else {
+        ++J;
+      }
+    }
+  }
+}
+
+/// Remove nodes that have been scheduled in previous NodeSets.
+void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
+  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+       ++I)
+    for (NodeSetType::iterator J = I + 1; J != E;) {
+      J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
+
+      if (J->size() == 0) {
+        NodeSets.erase(J);
+        E = NodeSets.end();
+      } else {
+        ++J;
+      }
+    }
+}
+
+/// Return true if Inst1 defines a value that is used in Inst2.
+static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
+  for (auto &SI : Inst1->Succs)
+    if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
+      return true;
+  return false;
+}
+
+/// Compute an ordered list of the dependence graph nodes, which
+/// indicates the order that the nodes will be scheduled.  This is a
+/// two-level algorithm. First, a partial order is created, which
+/// consists of a list of sets ordered from highest to lowest priority.
+void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
+  SmallSetVector<SUnit *, 8> R;
+  NodeOrder.clear();
+
+  for (auto &Nodes : NodeSets) {
+    DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
+    OrderKind Order;
+    SmallSetVector<SUnit *, 8> N;
+    if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
+      R.insert(N.begin(), N.end());
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (preds) ");
+    } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
+      R.insert(N.begin(), N.end());
+      Order = TopDown;
+      DEBUG(dbgs() << "  Top down (succs) ");
+    } else if (isIntersect(N, Nodes, R)) {
+      // If some of the successors are in the existing node-set, then use the
+      // top-down ordering.
+      Order = TopDown;
+      DEBUG(dbgs() << "  Top down (intersect) ");
+    } else if (NodeSets.size() == 1) {
+      for (auto &N : Nodes)
+        if (N->Succs.size() == 0)
+          R.insert(N);
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (all) ");
+    } else {
+      // Find the node with the highest ASAP.
+      SUnit *maxASAP = nullptr;
+      for (SUnit *SU : Nodes) {
+        if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
+          maxASAP = SU;
+      }
+      R.insert(maxASAP);
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (default) ");
+    }
+
+    while (!R.empty()) {
+      if (Order == TopDown) {
+        // Choose the node with the maximum height.  If more than one, choose
+        // the node with the lowest MOV. If still more than one, check if there
+        // is a dependence between the instructions.
+        while (!R.empty()) {
+          SUnit *maxHeight = nullptr;
+          for (SUnit *I : R) {
+            if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
+              maxHeight = I;
+            else if (getHeight(I) == getHeight(maxHeight) &&
+                     getMOV(I) < getMOV(maxHeight) &&
+                     !hasDataDependence(maxHeight, I))
+              maxHeight = I;
+            else if (hasDataDependence(I, maxHeight))
+              maxHeight = I;
+          }
+          NodeOrder.insert(maxHeight);
+          DEBUG(dbgs() << maxHeight->NodeNum << " ");
+          R.remove(maxHeight);
+          for (const auto &I : maxHeight->Succs) {
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            if (ignoreDependence(I, false))
+              continue;
+            R.insert(I.getSUnit());
+          }
+          // Back-edges are predecessors with an anti-dependence.
+          for (const auto &I : maxHeight->Preds) {
+            if (I.getKind() != SDep::Anti)
+              continue;
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            R.insert(I.getSUnit());
+          }
+        }
+        Order = BottomUp;
+        DEBUG(dbgs() << "\n   Switching order to bottom up ");
+        SmallSetVector<SUnit *, 8> N;
+        if (pred_L(NodeOrder, N, &Nodes))
+          R.insert(N.begin(), N.end());
+      } else {
+        // Choose the node with the maximum depth.  If more than one, choose
+        // the node with the lowest MOV. If there is still more than one, check
+        // for a dependence between the instructions.
+        while (!R.empty()) {
+          SUnit *maxDepth = nullptr;
+          for (SUnit *I : R) {
+            if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
+              maxDepth = I;
+            else if (getDepth(I) == getDepth(maxDepth) &&
+                     getMOV(I) < getMOV(maxDepth) &&
+                     !hasDataDependence(I, maxDepth))
+              maxDepth = I;
+            else if (hasDataDependence(maxDepth, I))
+              maxDepth = I;
+          }
+          NodeOrder.insert(maxDepth);
+          DEBUG(dbgs() << maxDepth->NodeNum << " ");
+          R.remove(maxDepth);
+          if (Nodes.isExceedSU(maxDepth)) {
+            Order = TopDown;
+            R.clear();
+            R.insert(Nodes.getNode(0));
+            break;
+          }
+          for (const auto &I : maxDepth->Preds) {
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            if (I.getKind() == SDep::Anti)
+              continue;
+            R.insert(I.getSUnit());
+          }
+          // Back-edges are predecessors with an anti-dependence.
+          for (const auto &I : maxDepth->Succs) {
+            if (I.getKind() != SDep::Anti)
+              continue;
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            R.insert(I.getSUnit());
+          }
+        }
+        Order = TopDown;
+        DEBUG(dbgs() << "\n   Switching order to top down ");
+        SmallSetVector<SUnit *, 8> N;
+        if (succ_L(NodeOrder, N, &Nodes))
+          R.insert(N.begin(), N.end());
+      }
+    }
+    DEBUG(dbgs() << "\nDone with Nodeset\n");
+  }
+
+  DEBUG({
+    dbgs() << "Node order: ";
+    for (SUnit *I : NodeOrder)
+      dbgs() << " " << I->NodeNum << " ";
+    dbgs() << "\n";
+  });
+}
+
+/// Process the nodes in the computed order and create the pipelined schedule
+/// of the instructions, if possible. Return true if a schedule is found.
+bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
+
+  if (NodeOrder.size() == 0)
+    return false;
+
+  bool scheduleFound = false;
+  // Keep increasing II until a valid schedule is found.
+  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+    Schedule.reset();
+    Schedule.setInitiationInterval(II);
+    DEBUG(dbgs() << "Try to schedule with " << II << "\n");
+
+    SetVector<SUnit *>::iterator NI = NodeOrder.begin();
+    SetVector<SUnit *>::iterator NE = NodeOrder.end();
+    do {
+      SUnit *SU = *NI;
+
+      // Compute the schedule time for the instruction, which is based
+      // upon the scheduled time for any predecessors/successors.
+      int EarlyStart = INT_MIN;
+      int LateStart = INT_MAX;
+      // These values are set when the size of the schedule window is limited
+      // due to chain dependences.
+      int SchedEnd = INT_MAX;
+      int SchedStart = INT_MIN;
+      Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
+                            II, this);
+      DEBUG({
+        dbgs() << "Inst (" << SU->NodeNum << ") ";
+        SU->getInstr()->dump();
+        dbgs() << "\n";
+      });
+      DEBUG({
+        dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
+               << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+      });
+
+      if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
+          SchedStart > LateStart)
+        scheduleFound = false;
+      else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
+        SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
+        scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+      } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
+        SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
+        scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
+      } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+        SchedEnd =
+            std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
+        // When scheduling a Phi it is better to start at the late cycle and go
+        // backwards. The default order may insert the Phi too far away from
+        // its first dependence.
+        if (SU->getInstr()->isPHI())
+          scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
+        else
+          scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+      } else {
+        int FirstCycle = Schedule.getFirstCycle();
+        scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
+                                        FirstCycle + getASAP(SU) + II - 1, II);
+      }
+      // Even if we find a schedule, make sure the schedule doesn't exceed the
+      // allowable number of stages. We keep trying if this happens.
+      if (scheduleFound)
+        if (SwpMaxStages > -1 &&
+            Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
+          scheduleFound = false;
+
+      DEBUG({
+        if (!scheduleFound)
+          dbgs() << "\tCan't schedule\n";
+      });
+    } while (++NI != NE && scheduleFound);
+
+    // If a schedule is found, check if it is a valid schedule too.
+    if (scheduleFound)
+      scheduleFound = Schedule.isValidSchedule(this);
+  }
+
+  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+
+  if (scheduleFound)
+    Schedule.finalizeSchedule(this);
+  else
+    Schedule.reset();
+
+  return scheduleFound && Schedule.getMaxStageCount() > 0;
+}
+
+/// Given a schedule for the loop, generate a new version of the loop,
+/// and replace the old version.  This function generates a prolog
+/// that contains the initial iterations in the pipeline, and kernel
+/// loop, and the epilogue that contains the code for the final
+/// iterations.
+void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
+  // Create a new basic block for the kernel and add it to the CFG.
+  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+  unsigned MaxStageCount = Schedule.getMaxStageCount();
+
+  // Remember the registers that are used in different stages. The index is
+  // the iteration, or stage, that the instruction is scheduled in.  This is
+  // a map between register names in the orignal block and the names created
+  // in each stage of the pipelined loop.
+  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+  InstrMapTy InstrMap;
+
+  SmallVector<MachineBasicBlock *, 4> PrologBBs;
+  // Generate the prolog instructions that set up the pipeline.
+  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
+  MF.insert(BB->getIterator(), KernelBB);
+
+  // Rearrange the instructions to generate the new, pipelined loop,
+  // and update register names as needed.
+  for (int Cycle = Schedule.getFirstCycle(),
+           LastCycle = Schedule.getFinalCycle();
+       Cycle <= LastCycle; ++Cycle) {
+    std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
+    // This inner loop schedules each instruction in the cycle.
+    for (SUnit *CI : CycleInstrs) {
+      if (CI->getInstr()->isPHI())
+        continue;
+      unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
+      MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
+      updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
+      KernelBB->push_back(NewMI);
+      InstrMap[NewMI] = CI->getInstr();
+    }
+  }
+
+  // Copy any terminator instructions to the new kernel, and update
+  // names as needed.
+  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
+                                   E = BB->instr_end();
+       I != E; ++I) {
+    MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+    updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
+    KernelBB->push_back(NewMI);
+    InstrMap[NewMI] = &*I;
+  }
+
+  KernelBB->transferSuccessors(BB);
+  KernelBB->replaceSuccessor(BB, KernelBB);
+
+  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
+                       VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
+  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
+               InstrMap, MaxStageCount, MaxStageCount, false);
+
+  DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+  SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+  // Generate the epilog instructions to complete the pipeline.
+  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
+                 PrologBBs);
+
+  // We need this step because the register allocation doesn't handle some
+  // situations well, so we insert copies to help out.
+  splitLifetimes(KernelBB, EpilogBBs, Schedule);
+
+  // Remove dead instructions due to loop induction variables.
+  removeDeadInstructions(KernelBB, EpilogBBs);
+
+  // Add branches between prolog and epilog blocks.
+  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+
+  // Remove the original loop since it's no longer referenced.
+  BB->clear();
+  BB->eraseFromParent();
+
+  delete[] VRMap;
+}
+
+/// Generate the pipeline prolog code.
+void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
+                                       MachineBasicBlock *KernelBB,
+                                       ValueMapTy *VRMap,
+                                       MBBVectorTy &PrologBBs) {
+  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+  assert(PreheaderBB != NULL &&
+         "Need to add code to handle loops w/o preheader");
+  MachineBasicBlock *PredBB = PreheaderBB;
+  InstrMapTy InstrMap;
+
+  // Generate a basic block for each stage, not including the last stage,
+  // which will be generated in the kernel. Each basic block may contain
+  // instructions from multiple stages/iterations.
+  for (unsigned i = 0; i < LastStage; ++i) {
+    // Create and insert the prolog basic block prior to the original loop
+    // basic block.  The original loop is removed later.
+    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+    PrologBBs.push_back(NewBB);
+    MF.insert(BB->getIterator(), NewBB);
+    NewBB->transferSuccessors(PredBB);
+    PredBB->addSuccessor(NewBB);
+    PredBB = NewBB;
+
+    // Generate instructions for each appropriate stage. Process instructions
+    // in original program order.
+    for (int StageNum = i; StageNum >= 0; --StageNum) {
+      for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                       BBE = BB->getFirstTerminator();
+           BBI != BBE; ++BBI) {
+        if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
+          if (BBI->isPHI())
+            continue;
+          MachineInstr *NewMI =
+              cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
+          updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
+                            VRMap);
+          NewBB->push_back(NewMI);
+          InstrMap[NewMI] = &*BBI;
+        }
+      }
+    }
+    rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
+    DEBUG({
+      dbgs() << "prolog:\n";
+      NewBB->dump();
+    });
+  }
+
+  PredBB->replaceSuccessor(BB, KernelBB);
+
+  // Check if we need to remove the branch from the preheader to the original
+  // loop, and replace it with a branch to the new loop.
+  unsigned numBranches = TII->removeBranch(*PreheaderBB);
+  if (numBranches) {
+    SmallVector<MachineOperand, 0> Cond;
+    TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
+  }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel.  We create a basic
+/// block for each stage that needs to complete.
+void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+                                       MachineBasicBlock *KernelBB,
+                                       ValueMapTy *VRMap,
+                                       MBBVectorTy &EpilogBBs,
+                                       MBBVectorTy &PrologBBs) {
+  // We need to change the branch from the kernel to the first epilog block, so
+  // this call to analyze branch uses the kernel rather than the original BB.
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+  if (checkBranch)
+    return;
+
+  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+  if (*LoopExitI == KernelBB)
+    ++LoopExitI;
+  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+  MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+  MachineBasicBlock *PredBB = KernelBB;
+  MachineBasicBlock *EpilogStart = LoopExitBB;
+  InstrMapTy InstrMap;
+
+  // Generate a basic block for each stage, not including the last stage,
+  // which was generated for the kernel.  Each basic block may contain
+  // instructions from multiple stages/iterations.
+  int EpilogStage = LastStage + 1;
+  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+    EpilogBBs.push_back(NewBB);
+    MF.insert(BB->getIterator(), NewBB);
+
+    PredBB->replaceSuccessor(LoopExitBB, NewBB);
+    NewBB->addSuccessor(LoopExitBB);
+
+    if (EpilogStart == LoopExitBB)
+      EpilogStart = NewBB;
+
+    // Add instructions to the epilog depending on the current block.
+    // Process instructions in original program order.
+    for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+      for (auto &BBI : *BB) {
+        if (BBI.isPHI())
+          continue;
+        MachineInstr *In = &BBI;
+        if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
+          MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
+          updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
+          NewBB->push_back(NewMI);
+          InstrMap[NewMI] = In;
+        }
+      }
+    }
+    generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
+                         VRMap, InstrMap, LastStage, EpilogStage, i == 1);
+    generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
+                 InstrMap, LastStage, EpilogStage, i == 1);
+    PredBB = NewBB;
+
+    DEBUG({
+      dbgs() << "epilog:\n";
+      NewBB->dump();
+    });
+  }
+
+  // Fix any Phi nodes in the loop exit block.
+  for (MachineInstr &MI : *LoopExitBB) {
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (MO.getMBB() == BB)
+        MO.setMBB(PredBB);
+    }
+  }
+
+  // Create a branch to the new epilog from the kernel.
+  // Remove the original branch and add a new branch to the epilog.
+  TII->removeBranch(*KernelBB);
+  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+  // Add a branch to the loop exit.
+  if (EpilogBBs.size() > 0) {
+    MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+    SmallVector<MachineOperand, 4> Cond1;
+    TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+  }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+                                    MachineBasicBlock *MBB,
+                                    MachineRegisterInfo &MRI,
+                                    LiveIntervals &LIS) {
+  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
+                                         E = MRI.use_end();
+       I != E;) {
+    MachineOperand &O = *I;
+    ++I;
+    if (O.getParent()->getParent() != MBB)
+      O.setReg(ToReg);
+  }
+  if (!LIS.hasInterval(ToReg))
+    LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+                            MachineRegisterInfo &MRI) {
+  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+                                         E = MRI.use_end();
+       I != E; ++I)
+    if (I->getParent()->getParent() != BB)
+      return true;
+  return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void SwingSchedulerDAG::generateExistingPhis(
+    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+    bool IsLast) {
+  // Compute the stage number for the inital value of the Phi, which
+  // comes from the prolog. The prolog to use depends on to which kernel/
+  // epilog that we're adding the Phi.
+  unsigned PrologStage = 0;
+  unsigned PrevStage = 0;
+  bool InKernel = (LastStageNum == CurStageNum);
+  if (InKernel) {
+    PrologStage = LastStageNum - 1;
+    PrevStage = CurStageNum;
+  } else {
+    PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+    PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+  }
+
+  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                   BBE = BB->getFirstNonPHI();
+       BBI != BBE; ++BBI) {
+    unsigned Def = BBI->getOperand(0).getReg();
+
+    unsigned InitVal = 0;
+    unsigned LoopVal = 0;
+    getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+    unsigned PhiOp1 = 0;
+    // The Phi value from the loop body typically is defined in the loop, but
+    // not always. So, we need to check if the value is defined in the loop.
+    unsigned PhiOp2 = LoopVal;
+    if (VRMap[LastStageNum].count(LoopVal))
+      PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+    int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+    int LoopValStage =
+        Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+    unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
+    if (NumStages == 0) {
+      // We don't need to generate a Phi anymore, but we need to rename any uses
+      // of the Phi value.
+      unsigned NewReg = VRMap[PrevStage][LoopVal];
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
+                            Def, NewReg);
+      if (VRMap[CurStageNum].count(LoopVal))
+        VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+    }
+    // Adjust the number of Phis needed depending on the number of prologs left,
+    // and the distance from where the Phi is first scheduled.
+    unsigned NumPhis = NumStages;
+    if (!InKernel && (int)PrologStage < LoopValStage)
+      // The NumPhis is the maximum number of new Phis needed during the steady
+      // state. If the Phi has not been scheduled in current prolog, then we
+      // need to generate less Phis.
+      NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
+    // The number of Phis cannot exceed the number of prolog stages. Each
+    // stage can potentially define two values.
+    NumPhis = std::min(NumPhis, PrologStage + 2);
+
+    unsigned NewReg = 0;
+
+    unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+    // In the epilog, we may need to look back one stage to get the correct
+    // Phi name because the epilog and prolog blocks execute the same stage.
+    // The correct name is from the previous block only when the Phi has
+    // been completely scheduled prior to the epilog, and Phi value is not
+    // needed in multiple stages.
+    int StageDiff = 0;
+    if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+        NumPhis == 1)
+      StageDiff = 1;
+    // Adjust the computations below when the phi and the loop definition
+    // are scheduled in different stages.
+    if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+      StageDiff = StageScheduled - LoopValStage;
+    for (unsigned np = 0; np < NumPhis; ++np) {
+      // If the Phi hasn't been scheduled, then use the initial Phi operand
+      // value. Otherwise, use the scheduled version of the instruction. This
+      // is a little complicated when a Phi references another Phi.
+      if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+        PhiOp1 = InitVal;
+      // Check if the Phi has already been scheduled in a prolog stage.
+      else if (PrologStage >= AccessStage + StageDiff + np &&
+               VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+        PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+      // Check if the Phi has already been scheduled, but the loop intruction
+      // is either another Phi, or doesn't occur in the loop.
+      else if (PrologStage >= AccessStage + StageDiff + np) {
+        // If the Phi references another Phi, we need to examine the other
+        // Phi to get the correct value.
+        PhiOp1 = LoopVal;
+        MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+        int Indirects = 1;
+        while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+          int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
+          if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+            PhiOp1 = getInitPhiReg(*InstOp1, BB);
+          else
+            PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+          InstOp1 = MRI.getVRegDef(PhiOp1);
+          int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
+          int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+          if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+              VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+            PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+            break;
+          }
+          ++Indirects;
+        }
+      } else
+        PhiOp1 = InitVal;
+      // If this references a generated Phi in the kernel, get the Phi operand
+      // from the incoming block.
+      if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+        if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+          PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+      MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+      bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+      // In the epilog, a map lookup is needed to get the value from the kernel,
+      // or previous epilog block. How is does this depends on if the
+      // instruction is scheduled in the previous block.
+      if (!InKernel) {
+        int StageDiffAdj = 0;
+        if (LoopValStage != -1 && StageScheduled > LoopValStage)
+          StageDiffAdj = StageScheduled - LoopValStage;
+        // Use the loop value defined in the kernel, unless the kernel
+        // contains the last definition of the Phi.
+        if (np == 0 && PrevStage == LastStageNum &&
+            (StageScheduled != 0 || LoopValStage != 0) &&
+            VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+          PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+        // Use the value defined by the Phi. We add one because we switch
+        // from looking at the loop value to the Phi definition.
+        else if (np > 0 && PrevStage == LastStageNum &&
+                 VRMap[PrevStage - np + 1].count(Def))
+          PhiOp2 = VRMap[PrevStage - np + 1][Def];
+        // Use the loop value defined in the kernel.
+        else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+                 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+          PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+        // Use the value defined by the Phi, unless we're generating the first
+        // epilog and the Phi refers to a Phi in a different stage.
+        else if (VRMap[PrevStage - np].count(Def) &&
+                 (!LoopDefIsPhi || PrevStage != LastStageNum))
+          PhiOp2 = VRMap[PrevStage - np][Def];
+      }
+
+      // Check if we can reuse an existing Phi. This occurs when a Phi
+      // references another Phi, and the other Phi is scheduled in an
+      // earlier stage. We can try to reuse an existing Phi up until the last
+      // stage of the current Phi.
+      if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
+        int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+        int StageDiff = (StageScheduled - LoopValStage);
+        LVNumStages -= StageDiff;
+        if (LVNumStages > (int)np) {
+          NewReg = PhiOp2;
+          unsigned ReuseStage = CurStageNum;
+          if (Schedule.isLoopCarried(this, *PhiInst))
+            ReuseStage -= LVNumStages;
+          // Check if the Phi to reuse has been generated yet. If not, then
+          // there is nothing to reuse.
+          if (VRMap[ReuseStage].count(LoopVal)) {
+            NewReg = VRMap[ReuseStage][LoopVal];
+
+            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                  &*BBI, Def, NewReg);
+            // Update the map with the new Phi name.
+            VRMap[CurStageNum - np][Def] = NewReg;
+            PhiOp2 = NewReg;
+            if (VRMap[LastStageNum - np - 1].count(LoopVal))
+              PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+            if (IsLast && np == NumPhis - 1)
+              replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+            continue;
+          }
+        } else if (InKernel && StageDiff > 0 &&
+                   VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+          PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+      }
+
+      const TargetRegisterClass *RC = MRI.getRegClass(Def);
+      NewReg = MRI.createVirtualRegister(RC);
+
+      MachineInstrBuilder NewPhi =
+          BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+                  TII->get(TargetOpcode::PHI), NewReg);
+      NewPhi.addReg(PhiOp1).addMBB(BB1);
+      NewPhi.addReg(PhiOp2).addMBB(BB2);
+      if (np == 0)
+        InstrMap[NewPhi] = &*BBI;
+
+      // We define the Phis after creating the new pipelined code, so
+      // we need to rename the Phi values in scheduled instructions.
+
+      unsigned PrevReg = 0;
+      if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+        PrevReg = VRMap[PrevStage - np][LoopVal];
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+                            Def, NewReg, PrevReg);
+      // If the Phi has been scheduled, use the new name for rewriting.
+      if (VRMap[CurStageNum - np].count(Def)) {
+        unsigned R = VRMap[CurStageNum - np][Def];
+        rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+                              R, NewReg);
+      }
+
+      // Check if we need to rename any uses that occurs after the loop. The
+      // register to replace depends on whether the Phi is scheduled in the
+      // epilog.
+      if (IsLast && np == NumPhis - 1)
+        replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+      // In the kernel, a dependent Phi uses the value from this Phi.
+      if (InKernel)
+        PhiOp2 = NewReg;
+
+      // Update the map with the new Phi name.
+      VRMap[CurStageNum - np][Def] = NewReg;
+    }
+
+    while (NumPhis++ < NumStages) {
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
+                            &*BBI, Def, NewReg, 0);
+    }
+
+    // Check if we need to rename a Phi that has been eliminated due to
+    // scheduling.
+    if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+      replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+  }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelened sequence.
+void SwingSchedulerDAG::generatePhis(
+    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+    bool IsLast) {
+  // Compute the stage number that contains the initial Phi value, and
+  // the Phi from the previous stage.
+  unsigned PrologStage = 0;
+  unsigned PrevStage = 0;
+  unsigned StageDiff = CurStageNum - LastStageNum;
+  bool InKernel = (StageDiff == 0);
+  if (InKernel) {
+    PrologStage = LastStageNum - 1;
+    PrevStage = CurStageNum;
+  } else {
+    PrologStage = LastStageNum - StageDiff;
+    PrevStage = LastStageNum + StageDiff - 1;
+  }
+
+  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+                                   BBE = BB->instr_end();
+       BBI != BBE; ++BBI) {
+    for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = BBI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+
+      int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+      assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+      unsigned Def = MO.getReg();
+      unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
+      // An instruction scheduled in stage 0 and is used after the loop
+      // requires a phi in the epilog for the last definition from either
+      // the kernel or prolog.
+      if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+          hasUseAfterLoop(Def, BB, MRI))
+        NumPhis = 1;
+      if (!InKernel && (unsigned)StageScheduled > PrologStage)
+        continue;
+
+      unsigned PhiOp2 = VRMap[PrevStage][Def];
+      if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+        if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+          PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+      // The number of Phis can't exceed the number of prolog stages. The
+      // prolog stage number is zero based.
+      if (NumPhis > PrologStage + 1 - StageScheduled)
+        NumPhis = PrologStage + 1 - StageScheduled;
+      for (unsigned np = 0; np < NumPhis; ++np) {
+        unsigned PhiOp1 = VRMap[PrologStage][Def];
+        if (np <= PrologStage)
+          PhiOp1 = VRMap[PrologStage - np][Def];
+        if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
+          if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+            PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+          if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
+            PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+        }
+        if (!InKernel)
+          PhiOp2 = VRMap[PrevStage - np][Def];
+
+        const TargetRegisterClass *RC = MRI.getRegClass(Def);
+        unsigned NewReg = MRI.createVirtualRegister(RC);
+
+        MachineInstrBuilder NewPhi =
+            BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+                    TII->get(TargetOpcode::PHI), NewReg);
+        NewPhi.addReg(PhiOp1).addMBB(BB1);
+        NewPhi.addReg(PhiOp2).addMBB(BB2);
+        if (np == 0)
+          InstrMap[NewPhi] = &*BBI;
+
+        // Rewrite uses and update the map. The actions depend upon whether
+        // we generating code for the kernel or epilog blocks.
+        if (InKernel) {
+          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                &*BBI, PhiOp1, NewReg);
+          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                &*BBI, PhiOp2, NewReg);
+
+          PhiOp2 = NewReg;
+          VRMap[PrevStage - np - 1][Def] = NewReg;
+        } else {
+          VRMap[CurStageNum - np][Def] = NewReg;
+          if (np == NumPhis - 1)
+            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                  &*BBI, Def, NewReg);
+        }
+        if (IsLast && np == NumPhis - 1)
+          replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+      }
+    }
+  }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself.  A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
+                                               MBBVectorTy &EpilogBBs) {
+  // For each epilog block, check that the value defined by each instruction
+  // is used.  If not, delete it.
+  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
+                                     MBE = EpilogBBs.rend();
+       MBB != MBE; ++MBB)
+    for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
+                                                   ME = (*MBB)->instr_rend();
+         MI != ME;) {
+      // From DeadMachineInstructionElem. Don't delete inline assembly.
+      if (MI->isInlineAsm()) {
+        ++MI;
+        continue;
+      }
+      bool SawStore = false;
+      // Check if it's safe to remove the instruction due to side effects.
+      // We can, and want to, remove Phis here.
+      if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+        ++MI;
+        continue;
+      }
+      bool used = true;
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                      MOE = MI->operands_end();
+           MOI != MOE; ++MOI) {
+        if (!MOI->isReg() || !MOI->isDef())
+          continue;
+        unsigned reg = MOI->getReg();
+        unsigned realUses = 0;
+        for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
+                                               EI = MRI.use_end();
+             UI != EI; ++UI) {
+          // Check if there are any uses that occur only in the original
+          // loop.  If so, that's not a real use.
+          if (UI->getParent()->getParent() != BB) {
+            realUses++;
+            used = true;
+            break;
+          }
+        }
+        if (realUses > 0)
+          break;
+        used = false;
+      }
+      if (!used) {
+        MI++->eraseFromParent();
+        continue;
+      }
+      ++MI;
+    }
+  // In the kernel block, check if we can remove a Phi that generates a value
+  // used in an instruction removed in the epilog block.
+  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+                                   BBE = KernelBB->getFirstNonPHI();
+       BBI != BBE;) {
+    MachineInstr *MI = &*BBI;
+    ++BBI;
+    unsigned reg = MI->getOperand(0).getReg();
+    if (MRI.use_begin(reg) == MRI.use_end()) {
+      MI->eraseFromParent();
+    }
+  }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+///   v1 = phi(a, v2)     v1 = phi(a, v2)
+///   v2 = phi(b, v3)     v2 = phi(b, v3)
+///   v3 = ..             v4 = copy v1
+///   .. = V1             v3 = ..
+///                       .. = v4
+void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
+                                       MBBVectorTy &EpilogBBs,
+                                       SMSchedule &Schedule) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+                                   BBF = KernelBB->getFirstNonPHI();
+       BBI != BBF; ++BBI) {
+    unsigned Def = BBI->getOperand(0).getReg();
+    // Check for any Phi definition that used as an operand of another Phi
+    // in the same block.
+    for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+                                                 E = MRI.use_instr_end();
+         I != E; ++I) {
+      if (I->isPHI() && I->getParent() == KernelBB) {
+        // Get the loop carried definition.
+        unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
+        if (!LCDef)
+          continue;
+        MachineInstr *MI = MRI.getVRegDef(LCDef);
+        if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+          continue;
+        // Search through the rest of the block looking for uses of the Phi
+        // definition. If one occurs, then split the lifetime.
+        unsigned SplitReg = 0;
+        for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+                                    KernelBB->instr_end()))
+          if (BBJ.readsRegister(Def)) {
+            // We split the lifetime when we find the first use.
+            if (SplitReg == 0) {
+              SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+              BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+                      TII->get(TargetOpcode::COPY), SplitReg)
+                  .addReg(Def);
+            }
+            BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+          }
+        if (!SplitReg)
+          continue;
+        // Search through each of the epilog blocks for any uses to be renamed.
+        for (auto &Epilog : EpilogBBs)
+          for (auto &I : *Epilog)
+            if (I.readsRegister(Def))
+              I.substituteRegister(Def, SplitReg, 0, *TRI);
+        break;
+      }
+    }
+  }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+  for (MachineInstr &MI : *BB) {
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+      if (MI.getOperand(i + 1).getMBB() == Incoming) {
+        MI.RemoveOperand(i + 1);
+        MI.RemoveOperand(i);
+        break;
+      }
+  }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block.  These edges are needed if the loop ends before reaching the
+/// kernel.
+void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+                                    MachineBasicBlock *KernelBB,
+                                    MBBVectorTy &EpilogBBs,
+                                    SMSchedule &Schedule, ValueMapTy *VRMap) {
+  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
+  MachineInstr *Cmp = Pass.LI.LoopCompare;
+  MachineBasicBlock *LastPro = KernelBB;
+  MachineBasicBlock *LastEpi = KernelBB;
+
+  // Start from the blocks connected to the kernel and work "out"
+  // to the first prolog and the last epilog blocks.
+  SmallVector<MachineInstr *, 4> PrevInsts;
+  unsigned MaxIter = PrologBBs.size() - 1;
+  unsigned LC = UINT_MAX;
+  unsigned LCMin = UINT_MAX;
+  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+    // Add branches to the prolog that go to the corresponding
+    // epilog, and the fall-thru prolog/kernel block.
+    MachineBasicBlock *Prolog = PrologBBs[j];
+    MachineBasicBlock *Epilog = EpilogBBs[i];
+    // We've executed one iteration, so decrement the loop count and check for
+    // the loop end.
+    SmallVector<MachineOperand, 4> Cond;
+    // Check if the LOOP0 has already been removed. If so, then there is no need
+    // to reduce the trip count.
+    if (LC != 0)
+      LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
+                                MaxIter);
+
+    // Record the value of the first trip count, which is used to determine if
+    // branches and blocks can be removed for constant trip counts.
+    if (LCMin == UINT_MAX)
+      LCMin = LC;
+
+    unsigned numAdded = 0;
+    if (TargetRegisterInfo::isVirtualRegister(LC)) {
+      Prolog->addSuccessor(Epilog);
+      numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+    } else if (j >= LCMin) {
+      Prolog->addSuccessor(Epilog);
+      Prolog->removeSuccessor(LastPro);
+      LastEpi->removeSuccessor(Epilog);
+      numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+      removePhis(Epilog, LastEpi);
+      // Remove the blocks that are no longer referenced.
+      if (LastPro != LastEpi) {
+        LastEpi->clear();
+        LastEpi->eraseFromParent();
+      }
+      LastPro->clear();
+      LastPro->eraseFromParent();
+    } else {
+      numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+      removePhis(Epilog, Prolog);
+    }
+    LastPro = Prolog;
+    LastEpi = Epilog;
+    for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+                                                   E = Prolog->instr_rend();
+         I != E && numAdded > 0; ++I, --numAdded)
+      updateInstruction(&*I, false, j, 0, Schedule, VRMap);
+  }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned BaseReg;
+  int64_t Offset;
+  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+    return false;
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  // Check if there is a Phi. If so, get the definition in the loop.
+  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+  if (BaseDef && BaseDef->isPHI()) {
+    BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+    BaseDef = MRI.getVRegDef(BaseReg);
+  }
+  if (!BaseDef)
+    return false;
+
+  int D = 0;
+  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+    return false;
+
+  Delta = D;
+  return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
+                                          MachineInstr &OldMI, unsigned Num) {
+  if (Num == 0)
+    return;
+  // If the instruction has memory operands, then adjust the offset
+  // when the instruction appears in different stages.
+  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
+  if (NumRefs == 0)
+    return;
+  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
+  unsigned Refs = 0;
+  for (MachineMemOperand *MMO : NewMI.memoperands()) {
+    if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+        (!MMO->getValue())) {
+      NewMemRefs[Refs++] = MMO;
+      continue;
+    }
+    unsigned Delta;
+    if (computeDelta(OldMI, Delta)) {
+      int64_t AdjOffset = Delta * Num;
+      NewMemRefs[Refs++] =
+          MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+    } else
+      NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
+  }
+  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
+                                            unsigned CurStageNum,
+                                            unsigned InstStageNum) {
+  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+  // Check for tied operands in inline asm instructions. This should be handled
+  // elsewhere, but I'm not sure of the best solution.
+  if (OldMI->isInlineAsm())
+    for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+      const auto &MO = OldMI->getOperand(i);
+      if (MO.isReg() && MO.isUse())
+        break;
+      unsigned UseIdx;
+      if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
+        NewMI->tieOperands(i, UseIdx);
+    }
+  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+  return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
+                                                     unsigned CurStageNum,
+                                                     unsigned InstStageNum,
+                                                     SMSchedule &Schedule) {
+  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+      InstrChanges.find(getSUnit(OldMI));
+  if (It != InstrChanges.end()) {
+    std::pair<unsigned, int64_t> RegAndOffset = It->second;
+    unsigned BasePos, OffsetPos;
+    if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+      return nullptr;
+    int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+    MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+    if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
+      NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+    NewMI->getOperand(OffsetPos).setImm(NewOffset);
+  }
+  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+  return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers.  This
+/// function may change the defintions and/or uses.
+void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
+                                          unsigned CurStageNum,
+                                          unsigned InstrStageNum,
+                                          SMSchedule &Schedule,
+                                          ValueMapTy *VRMap) {
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      continue;
+    unsigned reg = MO.getReg();
+    if (MO.isDef()) {
+      // Create a new virtual register for the definition.
+      const TargetRegisterClass *RC = MRI.getRegClass(reg);
+      unsigned NewReg = MRI.createVirtualRegister(RC);
+      MO.setReg(NewReg);
+      VRMap[CurStageNum][reg] = NewReg;
+      if (LastDef)
+        replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+    } else if (MO.isUse()) {
+      MachineInstr *Def = MRI.getVRegDef(reg);
+      // Compute the stage that contains the last definition for instruction.
+      int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
+      unsigned StageNum = CurStageNum;
+      if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+        // Compute the difference in stages between the defintion and the use.
+        unsigned StageDiff = (InstrStageNum - DefStageNum);
+        // Make an adjustment to get the last definition.
+        StageNum -= StageDiff;
+      }
+      if (VRMap[StageNum].count(reg))
+        MO.setReg(VRMap[StageNum][reg]);
+    }
+  }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+  SmallPtrSet<MachineInstr *, 8> Visited;
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+  while (Def->isPHI()) {
+    if (!Visited.insert(Def).second)
+      break;
+    for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+      if (Def->getOperand(i + 1).getMBB() == BB) {
+        Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+        break;
+      }
+  }
+  return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
+                                          unsigned LoopVal, unsigned LoopStage,
+                                          ValueMapTy *VRMap,
+                                          MachineBasicBlock *BB) {
+  unsigned PrevVal = 0;
+  if (StageNum > PhiStage) {
+    MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+    if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+      // The name is defined in the previous stage.
+      PrevVal = VRMap[StageNum - 1][LoopVal];
+    else if (VRMap[StageNum].count(LoopVal))
+      // The previous name is defined in the current stage when the instruction
+      // order is swapped.
+      PrevVal = VRMap[StageNum][LoopVal];
+    else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+      // The loop value hasn't yet been scheduled.
+      PrevVal = LoopVal;
+    else if (StageNum == PhiStage + 1)
+      // The loop value is another phi, which has not been scheduled.
+      PrevVal = getInitPhiReg(*LoopInst, BB);
+    else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+      // The loop value is another phi, which has been scheduled.
+      PrevVal =
+          getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+                        LoopStage, VRMap, BB);
+  }
+  return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
+                                         unsigned StageNum,
+                                         SMSchedule &Schedule,
+                                         ValueMapTy *VRMap,
+                                         InstrMapTy &InstrMap) {
+  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                   BBE = BB->getFirstNonPHI();
+       BBI != BBE; ++BBI) {
+    unsigned InitVal = 0;
+    unsigned LoopVal = 0;
+    getPhiRegs(*BBI, BB, InitVal, LoopVal);
+    unsigned PhiDef = BBI->getOperand(0).getReg();
+
+    unsigned PhiStage =
+        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
+    unsigned LoopStage =
+        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+    unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
+    if (NumPhis > StageNum)
+      NumPhis = StageNum;
+    for (unsigned np = 0; np <= NumPhis; ++np) {
+      unsigned NewVal =
+          getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+      if (!NewVal)
+        NewVal = InitVal;
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
+                            PhiDef, NewVal);
+    }
+  }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void SwingSchedulerDAG::rewriteScheduledInstr(
+    MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
+    unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
+    unsigned NewReg, unsigned PrevReg) {
+  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
+  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
+  // Rewrite uses that have been scheduled already to use the new
+  // Phi register.
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
+                                         EI = MRI.use_end();
+       UI != EI;) {
+    MachineOperand &UseOp = *UI;
+    MachineInstr *UseMI = UseOp.getParent();
+    ++UI;
+    if (UseMI->getParent() != BB)
+      continue;
+    if (UseMI->isPHI()) {
+      if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+        continue;
+      if (getLoopPhiReg(*UseMI, BB) != OldReg)
+        continue;
+    }
+    InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+    assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+    SUnit *OrigMISU = getSUnit(OrigInstr->second);
+    int StageSched = Schedule.stageScheduled(OrigMISU);
+    int CycleSched = Schedule.cycleScheduled(OrigMISU);
+    unsigned ReplaceReg = 0;
+    // This is the stage for the scheduled instruction.
+    if (StagePhi == StageSched && Phi->isPHI()) {
+      int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
+      if (PrevReg && InProlog)
+        ReplaceReg = PrevReg;
+      else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
+               (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
+        ReplaceReg = PrevReg;
+      else
+        ReplaceReg = NewReg;
+    }
+    // The scheduled instruction occurs before the scheduled Phi, and the
+    // Phi is not loop carried.
+    if (!InProlog && StagePhi + 1 == StageSched &&
+        !Schedule.isLoopCarried(this, *Phi))
+      ReplaceReg = NewReg;
+    if (StagePhi > StageSched && Phi->isPHI())
+      ReplaceReg = NewReg;
+    if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+      ReplaceReg = NewReg;
+    if (ReplaceReg) {
+      MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+      UseOp.setReg(ReplaceReg);
+    }
+  }
+}
+
+/// Check if we can change the instruction to use an offset value from the
+/// previous iteration. If so, return true and set the base and offset values
+/// so that we can rewrite the load, if necessary.
+///   v1 = Phi(v0, v3)
+///   v2 = load v1, 0
+///   v3 = post_store v1, 4, x
+/// This function enables the load to be rewritten as v2 = load v3, 4.
+bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
+                                              unsigned &BasePos,
+                                              unsigned &OffsetPos,
+                                              unsigned &NewBase,
+                                              int64_t &Offset) {
+  // Get the load instruction.
+  if (TII->isPostIncrement(*MI))
+    return false;
+  unsigned BasePosLd, OffsetPosLd;
+  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
+    return false;
+  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
+
+  // Look for the Phi instruction.
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
+  if (!Phi || !Phi->isPHI())
+    return false;
+  // Get the register defined in the loop block.
+  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
+  if (!PrevReg)
+    return false;
+
+  // Check for the post-increment load/store instruction.
+  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
+  if (!PrevDef || PrevDef == MI)
+    return false;
+
+  if (!TII->isPostIncrement(*PrevDef))
+    return false;
+
+  unsigned BasePos1 = 0, OffsetPos1 = 0;
+  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
+    return false;
+
+  // Make sure offset values are both positive or both negative.
+  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
+  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
+  if ((LoadOffset >= 0) != (StoreOffset >= 0))
+    return false;
+
+  // Set the return value once we determine that we return true.
+  BasePos = BasePosLd;
+  OffsetPos = OffsetPosLd;
+  NewBase = PrevReg;
+  Offset = StoreOffset;
+  return true;
+}
+
+/// Apply changes to the instruction if needed. The changes are need
+/// to improve the scheduling and depend up on the final schedule.
+MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+                                                  SMSchedule &Schedule,
+                                                  bool UpdateDAG) {
+  SUnit *SU = getSUnit(MI);
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+      InstrChanges.find(SU);
+  if (It != InstrChanges.end()) {
+    std::pair<unsigned, int64_t> RegAndOffset = It->second;
+    unsigned BasePos, OffsetPos;
+    if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+      return nullptr;
+    unsigned BaseReg = MI->getOperand(BasePos).getReg();
+    MachineInstr *LoopDef = findDefInLoop(BaseReg);
+    int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
+    int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
+    int BaseStageNum = Schedule.stageScheduled(SU);
+    int BaseCycleNum = Schedule.cycleScheduled(SU);
+    if (BaseStageNum < DefStageNum) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+      int OffsetDiff = DefStageNum - BaseStageNum;
+      if (DefCycleNum < BaseCycleNum) {
+        NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
+        if (OffsetDiff > 0)
+          --OffsetDiff;
+      }
+      int64_t NewOffset =
+          MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
+      NewMI->getOperand(OffsetPos).setImm(NewOffset);
+      if (UpdateDAG) {
+        SU->setInstr(NewMI);
+        MISUnitMap[NewMI] = SU;
+      }
+      NewMIs.insert(NewMI);
+      return NewMI;
+    }
+  }
+  return nullptr;
+}
+
+/// Return true for an order dependence that is loop carried potentially.
+/// An order dependence is loop carried if the destination defines a value
+/// that may be used by the source in a subsequent iteration.
+bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
+                                           bool isSucc) {
+  if (!isOrder(Source, Dep) || Dep.isArtificial())
+    return false;
+
+  if (!SwpPruneLoopCarried)
+    return true;
+
+  MachineInstr *SI = Source->getInstr();
+  MachineInstr *DI = Dep.getSUnit()->getInstr();
+  if (!isSucc)
+    std::swap(SI, DI);
+  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
+
+  // Assume ordered loads and stores may have a loop carried dependence.
+  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+      SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
+    return true;
+
+  // Only chain dependences between a load and store can be loop carried.
+  if (!DI->mayStore() || !SI->mayLoad())
+    return false;
+
+  unsigned DeltaS, DeltaD;
+  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
+    return true;
+
+  unsigned BaseRegS, BaseRegD;
+  int64_t OffsetS, OffsetD;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
+      !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+    return true;
+
+  if (BaseRegS != BaseRegD)
+    return true;
+
+  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
+  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
+
+  // This is the main test, which checks the offset values and the loop
+  // increment value to determine if the accesses may be loop carried.
+  if (OffsetS >= OffsetD)
+    return OffsetS + AccessSizeS > DeltaS;
+  else if (OffsetS < OffsetD)
+    return OffsetD + AccessSizeD > DeltaD;
+
+  return true;
+}
+
+void SwingSchedulerDAG::postprocessDAG() {
+  for (auto &M : Mutations)
+    M->apply(this);
+}
+
+/// Try to schedule the node at the specified StartCycle and continue
+/// until the node is schedule or the EndCycle is reached.  This function
+/// returns true if the node is scheduled.  This routine may search either
+/// forward or backward for a place to insert the instruction based upon
+/// the relative values of StartCycle and EndCycle.
+bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
+  bool forward = true;
+  if (StartCycle > EndCycle)
+    forward = false;
+
+  // The terminating condition depends on the direction.
+  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
+  for (int curCycle = StartCycle; curCycle != termCycle;
+       forward ? ++curCycle : --curCycle) {
+
+    // Add the already scheduled instructions at the specified cycle to the DFA.
+    Resources->clearResources();
+    for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
+         checkCycle <= LastCycle; checkCycle += II) {
+      std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
+
+      for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
+                                         E = cycleInstrs.end();
+           I != E; ++I) {
+        if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
+          continue;
+        assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+               "These instructions have already been scheduled.");
+        Resources->reserveResources(*(*I)->getInstr());
+      }
+    }
+    if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
+        Resources->canReserveResources(*SU->getInstr())) {
+      DEBUG({
+        dbgs() << "\tinsert at cycle " << curCycle << " ";
+        SU->getInstr()->dump();
+      });
+
+      ScheduledInstrs[curCycle].push_back(SU);
+      InstrToCycle.insert(std::make_pair(SU, curCycle));
+      if (curCycle > LastCycle)
+        LastCycle = curCycle;
+      if (curCycle < FirstCycle)
+        FirstCycle = curCycle;
+      return true;
+    }
+    DEBUG({
+      dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
+      SU->getInstr()->dump();
+    });
+  }
+  return false;
+}
+
+// Return the cycle of the earliest scheduled instruction in the chain.
+int SMSchedule::earliestCycleInChain(const SDep &Dep) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SDep, 8> Worklist;
+  Worklist.push_back(Dep);
+  int EarlyCycle = INT_MAX;
+  while (!Worklist.empty()) {
+    const SDep &Cur = Worklist.pop_back_val();
+    SUnit *PrevSU = Cur.getSUnit();
+    if (Visited.count(PrevSU))
+      continue;
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
+    if (it == InstrToCycle.end())
+      continue;
+    EarlyCycle = std::min(EarlyCycle, it->second);
+    for (const auto &PI : PrevSU->Preds)
+      if (SwingSchedulerDAG::isOrder(PrevSU, PI))
+        Worklist.push_back(PI);
+    Visited.insert(PrevSU);
+  }
+  return EarlyCycle;
+}
+
+// Return the cycle of the latest scheduled instruction in the chain.
+int SMSchedule::latestCycleInChain(const SDep &Dep) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SDep, 8> Worklist;
+  Worklist.push_back(Dep);
+  int LateCycle = INT_MIN;
+  while (!Worklist.empty()) {
+    const SDep &Cur = Worklist.pop_back_val();
+    SUnit *SuccSU = Cur.getSUnit();
+    if (Visited.count(SuccSU))
+      continue;
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
+    if (it == InstrToCycle.end())
+      continue;
+    LateCycle = std::max(LateCycle, it->second);
+    for (const auto &SI : SuccSU->Succs)
+      if (SwingSchedulerDAG::isOrder(SuccSU, SI))
+        Worklist.push_back(SI);
+    Visited.insert(SuccSU);
+  }
+  return LateCycle;
+}
+
+/// If an instruction has a use that spans multiple iterations, then
+/// return true. These instructions are characterized by having a back-ege
+/// to a Phi, which contains a reference to another Phi.
+static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
+  for (auto &P : SU->Preds)
+    if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
+      for (auto &S : P.getSUnit()->Succs)
+        if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
+          return P.getSUnit();
+  return nullptr;
+}
+
+/// Compute the scheduling start slot for the instruction.  The start slot
+/// depends on any predecessor or successor nodes scheduled already.
+void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+                              int *MinEnd, int *MaxStart, int II,
+                              SwingSchedulerDAG *DAG) {
+  // Iterate over each instruction that has been scheduled already.  The start
+  // slot computuation depends on whether the previously scheduled instruction
+  // is a predecessor or successor of the specified instruction.
+  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
+
+    // Iterate over each instruction in the current cycle.
+    for (SUnit *I : getInstructions(cycle)) {
+      // Because we're processing a DAG for the dependences, we recognize
+      // the back-edge in recurrences by anti dependences.
+      for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
+        const SDep &Dep = SU->Preds[i];
+        if (Dep.getSUnit() == I) {
+          if (!DAG->isBackedge(SU, Dep)) {
+            int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+            if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
+              int End = earliestCycleInChain(Dep) + (II - 1);
+              *MinEnd = std::min(*MinEnd, End);
+            }
+          } else {
+            int LateStart = cycle - DAG->getLatency(SU, Dep) +
+                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+            *MinLateStart = std::min(*MinLateStart, LateStart);
+          }
+        }
+        // For instruction that requires multiple iterations, make sure that
+        // the dependent instruction is not scheduled past the definition.
+        SUnit *BE = multipleIterations(I, DAG);
+        if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
+            !SU->isPred(I))
+          *MinLateStart = std::min(*MinLateStart, cycle);
+      }
+      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
+        if (SU->Succs[i].getSUnit() == I) {
+          const SDep &Dep = SU->Succs[i];
+          if (!DAG->isBackedge(SU, Dep)) {
+            int LateStart = cycle - DAG->getLatency(SU, Dep) +
+                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+            *MinLateStart = std::min(*MinLateStart, LateStart);
+            if (DAG->isLoopCarriedOrder(SU, Dep)) {
+              int Start = latestCycleInChain(Dep) + 1 - II;
+              *MaxStart = std::max(*MaxStart, Start);
+            }
+          } else {
+            int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+          }
+        }
+    }
+  }
+}
+
+/// Order the instructions within a cycle so that the definitions occur
+/// before the uses. Returns true if the instruction is added to the start
+/// of the list, or false if added to the end.
+bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+                                 std::deque<SUnit *> &Insts) {
+  MachineInstr *MI = SU->getInstr();
+  bool OrderBeforeUse = false;
+  bool OrderAfterDef = false;
+  bool OrderBeforeDef = false;
+  unsigned MoveDef = 0;
+  unsigned MoveUse = 0;
+  int StageInst1 = stageScheduled(SU);
+
+  unsigned Pos = 0;
+  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
+       ++I, ++Pos) {
+    // Relative order of Phis does not matter.
+    if (MI->isPHI() && (*I)->getInstr()->isPHI())
+      continue;
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      unsigned Reg = MO.getReg();
+      unsigned BasePos, OffsetPos;
+      if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+        if (MI->getOperand(BasePos).getReg() == Reg)
+          if (unsigned NewReg = SSD->getInstrBaseReg(SU))
+            Reg = NewReg;
+      bool Reads, Writes;
+      std::tie(Reads, Writes) =
+          (*I)->getInstr()->readsWritesVirtualRegister(Reg);
+      if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+      } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
+        // Add the instruction after the scheduled instruction.
+        OrderAfterDef = true;
+        MoveDef = Pos;
+      } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
+        if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        } else {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+        if (MoveUse != 0) {
+          OrderAfterDef = true;
+          MoveDef = Pos - 1;
+        }
+      } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
+        // Add the instruction before the scheduled instruction.
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+      } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
+                 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
+        OrderBeforeDef = true;
+        MoveUse = Pos;
+      }
+    }
+    // Check for order dependences between instructions. Make sure the source
+    // is ordered before the destination.
+    for (auto &S : SU->Succs)
+      if (S.getKind() == SDep::Order) {
+        if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        }
+      } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
+        if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
+          if (S.isAssignedRegDep()) {
+            OrderAfterDef = true;
+            MoveDef = Pos;
+          }
+        } else {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        }
+      }
+    for (auto &P : SU->Preds)
+      if (P.getKind() == SDep::Order) {
+        if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
+        if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
+          if (P.isAssignedRegDep()) {
+            OrderBeforeUse = true;
+            MoveUse = Pos;
+          }
+        } else {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      }
+  }
+
+  // A circular dependence.
+  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
+    OrderBeforeUse = false;
+
+  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
+  // to a loop-carried dependence.
+  if (OrderBeforeDef)
+    OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
+
+  // The uncommon case when the instruction order needs to be updated because
+  // there is both a use and def.
+  if (OrderBeforeUse && OrderAfterDef) {
+    SUnit *UseSU = Insts.at(MoveUse);
+    SUnit *DefSU = Insts.at(MoveDef);
+    if (MoveUse > MoveDef) {
+      Insts.erase(Insts.begin() + MoveUse);
+      Insts.erase(Insts.begin() + MoveDef);
+    } else {
+      Insts.erase(Insts.begin() + MoveDef);
+      Insts.erase(Insts.begin() + MoveUse);
+    }
+    if (orderDependence(SSD, UseSU, Insts)) {
+      Insts.push_front(SU);
+      orderDependence(SSD, DefSU, Insts);
+      return true;
+    }
+    Insts.pop_back();
+    Insts.push_back(SU);
+    Insts.push_back(UseSU);
+    orderDependence(SSD, DefSU, Insts);
+    return false;
+  }
+  // Put the new instruction first if there is a use in the list. Otherwise,
+  // put it at the end of the list.
+  if (OrderBeforeUse)
+    Insts.push_front(SU);
+  else
+    Insts.push_back(SU);
+  return OrderBeforeUse;
+}
+
+/// Return true if the scheduled Phi has a loop carried operand.
+bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
+  if (!Phi.isPHI())
+    return false;
+  assert(Phi.isPHI() && "Expecing a Phi.");
+  SUnit *DefSU = SSD->getSUnit(&Phi);
+  unsigned DefCycle = cycleScheduled(DefSU);
+  int DefStage = stageScheduled(DefSU);
+
+  unsigned InitVal = 0;
+  unsigned LoopVal = 0;
+  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
+  if (!UseSU)
+    return true;
+  if (UseSU->getInstr()->isPHI())
+    return true;
+  unsigned LoopCycle = cycleScheduled(UseSU);
+  int LoopStage = stageScheduled(UseSU);
+  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+/// Return true if the instruction is a definition that is loop carried
+/// and defines the use on the next iteration.
+///        v1 = phi(v2, v3)
+///  (Def) v3 = op v1
+///  (MO)   = v1
+/// If MO appears before Def, then then v1 and v3 may get assigned to the same
+/// register.
+bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
+                                       MachineInstr *Def, MachineOperand &MO) {
+  if (!MO.isReg())
+    return false;
+  if (Def->isPHI())
+    return false;
+  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
+  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
+    return false;
+  if (!isLoopCarried(SSD, *Phi))
+    return false;
+  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
+  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+    MachineOperand &DMO = Def->getOperand(i);
+    if (!DMO.isReg() || !DMO.isDef())
+      continue;
+    if (DMO.getReg() == LoopReg)
+      return true;
+  }
+  return false;
+}
+
+// Check if the generated schedule is valid. This function checks if
+// an instruction that uses a physical register is scheduled in a
+// different stage than the definition. The pipeliner does not handle
+// physical register values that may cross a basic block boundary.
+bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
+  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
+    SUnit &SU = SSD->SUnits[i];
+    if (!SU.hasPhysRegDefs)
+      continue;
+    int StageDef = stageScheduled(&SU);
+    assert(StageDef != -1 && "Instruction should have been scheduled.");
+    for (auto &SI : SU.Succs)
+      if (SI.isAssignedRegDep())
+        if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
+          if (stageScheduled(SI.getSUnit()) != StageDef)
+            return false;
+  }
+  return true;
+}
+
+/// After the schedule has been formed, call this function to combine
+/// the instructions from the different stages/cycles.  That is, this
+/// function creates a schedule that represents a single iteration.
+void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
+  // Move all instructions to the first stage from later stages.
+  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+    for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
+         ++stage) {
+      std::deque<SUnit *> &cycleInstrs =
+          ScheduledInstrs[cycle + (stage * InitiationInterval)];
+      for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
+                                                 E = cycleInstrs.rend();
+           I != E; ++I)
+        ScheduledInstrs[cycle].push_front(*I);
+    }
+  }
+  // Iterate over the definitions in each instruction, and compute the
+  // stage difference for each use.  Keep the maximum value.
+  for (auto &I : InstrToCycle) {
+    int DefStage = stageScheduled(I.first);
+    MachineInstr *MI = I.first->getInstr();
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+
+      unsigned Reg = Op.getReg();
+      unsigned MaxDiff = 0;
+      bool PhiIsSwapped = false;
+      for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
+                                             EI = MRI.use_end();
+           UI != EI; ++UI) {
+        MachineOperand &UseOp = *UI;
+        MachineInstr *UseMI = UseOp.getParent();
+        SUnit *SUnitUse = SSD->getSUnit(UseMI);
+        int UseStage = stageScheduled(SUnitUse);
+        unsigned Diff = 0;
+        if (UseStage != -1 && UseStage >= DefStage)
+          Diff = UseStage - DefStage;
+        if (MI->isPHI()) {
+          if (isLoopCarried(SSD, *MI))
+            ++Diff;
+          else
+            PhiIsSwapped = true;
+        }
+        MaxDiff = std::max(Diff, MaxDiff);
+      }
+      RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+    }
+  }
+
+  // Erase all the elements in the later stages. Only one iteration should
+  // remain in the scheduled list, and it contains all the instructions.
+  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
+    ScheduledInstrs.erase(cycle);
+
+  // Change the registers in instruction as specified in the InstrChanges
+  // map. We need to use the new registers to create the correct order.
+  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SSD->SUnits[i];
+    SSD->applyInstrChange(SU->getInstr(), *this, true);
+  }
+
+  // Reorder the instructions in each cycle to fix and improve the
+  // generated code.
+  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
+    std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
+    std::deque<SUnit *> newOrderZC;
+    // Put the zero-cost, pseudo instructions at the start of the cycle.
+    for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+      SUnit *SU = cycleInstrs[i];
+      if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+        orderDependence(SSD, SU, newOrderZC);
+    }
+    std::deque<SUnit *> newOrderI;
+    // Then, add the regular instructions back.
+    for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+      SUnit *SU = cycleInstrs[i];
+      if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+        orderDependence(SSD, SU, newOrderI);
+    }
+    // Replace the old order with the new order.
+    cycleInstrs.swap(newOrderZC);
+    cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+  }
+
+  DEBUG(dump(););
+}
+
+/// Print the schedule information to the given output.
+void SMSchedule::print(raw_ostream &os) const {
+  // Iterate over each cycle.
+  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+    // Iterate over each instruction in the cycle.
+    const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
+    for (SUnit *CI : cycleInstrs->second) {
+      os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
+      os << "(" << CI->NodeNum << ") ";
+      CI->getInstr()->print(os);
+      os << "\n";
+    }
+  }
+}
+
+/// Utility function used for debugging to print the schedule.
+void SMSchedule::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 613598d..242cb0b 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,11 +21,16 @@
 
 using namespace llvm;
 
+static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
+  cl::init(true), cl::desc("Enable subregister liveness tracking."));
+
 // Pin the vtable to this file.
 void MachineRegisterInfo::Delegate::anchor() {}
 
 MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
-    : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) {
+    : MF(MF), TheDelegate(nullptr),
+      TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+                           EnableSubRegLiveness) {
   unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
@@ -88,6 +93,13 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
   return true;
 }
 
+unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+  VRegInfo.grow(Reg);
+  RegAllocHints.grow(Reg);
+  return Reg;
+}
+
 /// createVirtualRegister - Create and return a new virtual register in the
 /// function with the specified register class.
 ///
@@ -98,41 +110,42 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
          "Virtual register RegClass must be allocatable.");
 
   // New virtual register number.
-  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-  VRegInfo.grow(Reg);
+  unsigned Reg = createIncompleteVirtualRegister();
   VRegInfo[Reg].first = RegClass;
-  RegAllocHints.grow(Reg);
   if (TheDelegate)
     TheDelegate->MRI_NoteNewVirtualRegister(Reg);
   return Reg;
 }
 
-unsigned
-MachineRegisterInfo::getSize(unsigned VReg) const {
-  VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg);
-  return SizeIt != getVRegToSize().end() ? SizeIt->second : 0;
+LLT MachineRegisterInfo::getType(unsigned VReg) const {
+  VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg);
+  return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{};
 }
 
-void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) {
-  getVRegToSize()[VReg] = Size;
+void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
+  // Check that VReg doesn't have a class.
+  assert((getRegClassOrRegBank(VReg).isNull() ||
+         !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
+         "Can't set the size of a non-generic virtual register");
+  getVRegToType()[VReg] = Ty;
 }
 
 unsigned
-MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) {
-  assert(Size && "Cannot create empty virtual register");
-
+MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) {
   // New virtual register number.
-  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-  VRegInfo.grow(Reg);
+  unsigned Reg = createIncompleteVirtualRegister();
   // FIXME: Should we use a dummy register class?
-  VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr);
-  getVRegToSize()[Reg] = Size;
-  RegAllocHints.grow(Reg);
+  VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
+  getVRegToType()[Reg] = Ty;
   if (TheDelegate)
     TheDelegate->MRI_NoteNewVirtualRegister(Reg);
   return Reg;
 }
 
+void MachineRegisterInfo::clearVirtRegTypes() {
+  getVRegToType().clear();
+}
+
 /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
 void MachineRegisterInfo::clearVirtRegs() {
 #ifndef NDEBUG
@@ -444,13 +457,16 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
          "Invalid ReservedRegs vector from target");
 }
 
-bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
-                                            const MachineFunction &MF) const {
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
 
+  const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+  if (TRI->isConstantPhysReg(PhysReg))
+    return true;
+
   // Check if any overlapping register is modified, or allocatable so it may be
   // used later.
-  for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true);
+  for (MCRegAliasIterator AI(PhysReg, TRI, true);
        AI.isValid(); ++AI)
     if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 47ad60c..e9b4755 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index d921e29..e06bc51 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched(
     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     cl::init(true), cl::Hidden);
 
-/// Forward declare the standard machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
-
 /// Decrement this iterator until reaching the top or a non-debug instr.
 static MachineBasicBlock::const_iterator
 priorNonDebug(MachineBasicBlock::const_iterator I,
@@ -251,8 +246,8 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
 static MachineBasicBlock::iterator
 priorNonDebug(MachineBasicBlock::iterator I,
               MachineBasicBlock::const_iterator Beg) {
-  return const_cast<MachineInstr*>(
-    &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
+  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
+      .getNonConstIterator();
 }
 
 /// If this iterator is a debug value, increment until reaching the End or a
@@ -271,12 +266,8 @@ nextIfDebug(MachineBasicBlock::const_iterator I,
 static MachineBasicBlock::iterator
 nextIfDebug(MachineBasicBlock::iterator I,
             MachineBasicBlock::const_iterator End) {
-  // Cast the return value to nonconst MachineInstr, then cast to an
-  // instr_iterator, which does not check for null, finally return a
-  // bundle_iterator.
-  return MachineBasicBlock::instr_iterator(
-    const_cast<MachineInstr*>(
-      &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
+      .getNonConstIterator();
 }
 
 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
@@ -458,9 +449,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
       unsigned NumRegionInstrs = 0;
       MachineBasicBlock::iterator I = RegionEnd;
       for (;I != MBB->begin(); --I) {
-        if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
+        MachineInstr &MI = *std::prev(I);
+        if (isSchedBoundary(&MI, &*MBB, MF, TII))
           break;
-        if (!I->isDebugValue())
+        if (!MI.isDebugValue())
           ++NumRegionInstrs;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
@@ -692,8 +684,14 @@ void ScheduleDAGMI::schedule() {
   // This may initialize a DFSResult to be used for queue priority.
   SchedImpl->initialize(this);
 
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
+  DEBUG(
+    if (EntrySU.getInstr() != nullptr)
+      EntrySU.dumpAll(this);
+    for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+      SUnits[su].dumpAll(this);
+    if (ExitSU.getInstr() != nullptr)
+      ExitSU.dumpAll(this);
+  );
   if (ViewMISchedDAGs) viewGraph();
 
   // Initialize ready queues now that the DAG and priority data are finalized.
@@ -862,6 +860,44 @@ ScheduleDAGMILive::~ScheduleDAGMILive() {
   delete DFSResult;
 }
 
+void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
+  const MachineInstr &MI = *SU.getInstr();
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg())
+      continue;
+    if (!MO.readsReg())
+      continue;
+    if (TrackLaneMasks && !MO.isUse())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    // Ignore re-defs.
+    if (TrackLaneMasks) {
+      bool FoundDef = false;
+      for (const MachineOperand &MO2 : MI.operands()) {
+        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+          FoundDef = true;
+          break;
+        }
+      }
+      if (FoundDef)
+        continue;
+    }
+
+    // Record this local VReg use.
+    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+    for (; UI != VRegUses.end(); ++UI) {
+      if (UI->SU == &SU)
+        break;
+    }
+    if (UI == VRegUses.end())
+      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
+  }
+}
+
 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 /// the region, including the boundary itself and single-instruction regions
@@ -889,6 +925,11 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 // Setup the register pressure trackers for the top scheduled top and bottom
 // scheduled regions.
 void ScheduleDAGMILive::initRegPressure() {
+  VRegUses.clear();
+  VRegUses.setUniverse(MRI.getNumVirtRegs());
+  for (SUnit &SU : SUnits)
+    collectVRegUses(SU);
+
   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
                     ShouldTrackLaneMasks, false);
   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
@@ -999,7 +1040,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
       // this fact anymore => decrement pressure.
       // If the register has just become dead then other uses make it come
       // back to life => increment pressure.
-      bool Decrement = P.LaneMask != 0;
+      bool Decrement = P.LaneMask.any();
 
       for (const VReg2SUnit &V2SU
            : make_range(VRegUses.find(Reg), VRegUses.end())) {
@@ -1018,7 +1059,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
         );
       }
     } else {
-      assert(P.LaneMask != 0);
+      assert(P.LaneMask.any());
       DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
       // This may be called before CurrentBottom has been initialized. However,
       // BotRPTracker must have a valid position. We want the value live into the
@@ -1087,6 +1128,8 @@ void ScheduleDAGMILive::schedule() {
   SchedImpl->initialize(this);
 
   DEBUG(
+    if (EntrySU.getInstr() != nullptr)
+      EntrySU.dumpAll(this);
     for (const SUnit &SU : SUnits) {
       SU.dumpAll(this);
       if (ShouldTrackPressure) {
@@ -1095,6 +1138,8 @@ void ScheduleDAGMILive::schedule() {
       }
       dbgs() << '\n';
     }
+    if (ExitSU.getInstr() != nullptr)
+      ExitSU.dumpAll(this);
   );
   if (ViewMISchedDAGs) viewGraph();
 
@@ -1362,7 +1407,8 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
         : SU(su), BaseReg(reg), Offset(ofs) {}
 
     bool operator<(const MemOpInfo&RHS) const {
-      return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
+      return std::tie(BaseReg, Offset, SU->NodeNum) <
+             std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
     }
   };
 
@@ -1395,6 +1441,24 @@ public:
 };
 } // anonymous
 
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+                             const TargetRegisterInfo *TRI) {
+  return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+                            : nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+                              const TargetRegisterInfo *TRI) {
+  return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+                            : nullptr;
+}
+
+} // namespace llvm
+
 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
   SmallVector<MemOpInfo, 32> MemOpRecords;
@@ -1487,29 +1551,23 @@ namespace {
 /// that may be fused by the processor into a single operation.
 class MacroFusion : public ScheduleDAGMutation {
   const TargetInstrInfo &TII;
-  const TargetRegisterInfo &TRI;
 public:
-  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
-    : TII(TII), TRI(TRI) {}
+  MacroFusion(const TargetInstrInfo &TII)
+    : TII(TII) {}
 
   void apply(ScheduleDAGInstrs *DAGInstrs) override;
 };
 } // anonymous
 
-/// Returns true if \p MI reads a register written by \p Other.
-static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
-                       const MachineInstr &Other) {
-  for (const MachineOperand &MO : MI.uses()) {
-    if (!MO.isReg() || !MO.readsReg())
-      continue;
+namespace llvm {
 
-    unsigned Reg = MO.getReg();
-    if (Other.modifiesRegister(Reg, &TRI))
-      return true;
-  }
-  return false;
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
+  return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
 }
 
+} // namespace llvm
+
 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 /// fused operations.
 void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
@@ -1521,16 +1579,12 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
   if (!Branch)
     return;
 
-  for (SUnit &SU : DAG->SUnits) {
-    // SUnits with successors can't be schedule in front of the ExitSU.
-    if (!SU.Succs.empty())
-      continue;
-    // We only care if the node writes to a register that the branch reads.
-    MachineInstr *Pred = SU.getInstr();
-    if (!HasDataDep(TRI, *Branch, *Pred))
+  for (SDep &PredDep : ExitSU.Preds) {
+    if (PredDep.isWeak())
       continue;
-
-    if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
+    SUnit &SU = *PredDep.getSUnit();
+    MachineInstr &Pred = *SU.getInstr();
+    if (!TII.shouldScheduleAdjacent(Pred, *Branch))
       continue;
 
     // Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1543,6 +1597,16 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
     (void)Success;
     assert(Success && "No DAG nodes should be reachable from ExitSU");
 
+    // Adjust latency of data deps between the nodes.
+    for (SDep &PredDep : ExitSU.Preds) {
+      if (PredDep.getSUnit() == &SU)
+        PredDep.setLatency(0);
+    }
+    for (SDep &SuccDep : SU.Succs) {
+      if (SuccDep.getSUnit() == &ExitSU)
+        SuccDep.setLatency(0);
+    }
+
     DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
     break;
   }
@@ -1572,6 +1636,16 @@ protected:
 };
 } // anonymous
 
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+                             const TargetRegisterInfo *TRI) {
+  return make_unique<CopyConstrain>(TII, TRI);
+}
+
+} // namespace llvm
+
 /// constrainLocalCopy handles two possibilities:
 /// 1) Local src:
 /// I0:     = dst
@@ -1760,7 +1834,6 @@ void SchedBoundary::reset() {
   Available.clear();
   Pending.clear();
   CheckPending = false;
-  NextSUs.clear();
   CurrCycle = 0;
   CurrMOps = 0;
   MinReadyCycle = UINT_MAX;
@@ -1961,23 +2034,6 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
     Pending.push(SU);
   else
     Available.push(SU);
-
-  // Record this node as an immediate dependent of the scheduled node.
-  NextSUs.insert(SU);
-}
-
-void SchedBoundary::releaseTopNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  releaseNode(SU, SU->TopReadyCycle);
-}
-
-void SchedBoundary::releaseBottomNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  releaseNode(SU, SU->BotReadyCycle);
 }
 
 /// Move the boundary of scheduled code by one cycle.
@@ -2828,9 +2884,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
   bool SameBoundary = Zone != nullptr;
   if (SameBoundary) {
     // For loops that are acyclic path limited, aggressively schedule for
-    // latency.  This can result in very long dependence chains scheduled in
-    // sequence, so once every cycle (when CurrMOps == 0), switch to normal
-    // heuristics.
+    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+    // heuristics to take precedence.
     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
         tryLatency(TryCand, Cand, *Zone))
       return;
@@ -2888,13 +2943,6 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
       return;
 
-    // Prefer immediate defs/users of the last scheduled instruction. This is a
-    // local pressure avoidance strategy that also makes the machine code
-    // readable.
-    if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
-                   TryCand, Cand, NextDefUse))
-      return;
-
     // Fall through to original instruction order.
     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
@@ -3105,28 +3153,24 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 
 /// Create the standard converging machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
   // Register DAG post-processors.
   //
   // FIXME: extend the mutation API to allow earlier mutations to instantiate
   // data and pass it to later mutations. Have a single mutation that gathers
   // the interesting nodes in one pass.
-  DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
-  if (EnableMemOpCluster) {
-    if (DAG->TII->enableClusterLoads())
-      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
-    if (DAG->TII->enableClusterStores())
-      DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
-  }
-  if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
   return DAG;
 }
 
+static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+  return createGenericSchedLive(C);
+}
+
 static MachineSchedRegistry
 GenericSchedRegistry("converge", "Standard converging scheduler.",
-                     createGenericSchedLive);
+                     createConveringSched);
 
 //===----------------------------------------------------------------------===//
 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3257,9 +3301,9 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   Top.bumpNode(SU);
 }
 
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
-  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+                           /*RemoveKillFlags=*/true);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 571a5c1..5f87b68 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -22,9 +22,15 @@
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/LLVMContext.h"
@@ -34,6 +40,13 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-sink"
@@ -48,12 +61,21 @@ UseBlockFreqInfo("machine-sink-bfi",
            cl::desc("Use block frequency info to find successors to sink"),
            cl::init(true), cl::Hidden);
 
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+    "machine-sink-split-probability-threshold",
+    cl::desc(
+        "Percentage threshold for splitting single-instruction critical edge. "
+        "If the branch threshold is higher than this threshold, we allow "
+        "speculative execution of up to 1 instruction to avoid branching to "
+        "splitted critical edge"),
+    cl::init(40), cl::Hidden);
 
 STATISTIC(NumSunk,      "Number of machine instructions sunk");
 STATISTIC(NumSplit,     "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 
 namespace {
+
   class MachineSinking : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -62,15 +84,16 @@ namespace {
     MachinePostDominatorTree *PDT; // Machine post dominator tree
     MachineLoopInfo *LI;
     const MachineBlockFrequencyInfo *MBFI;
+    const MachineBranchProbabilityInfo *MBPI;
     AliasAnalysis *AA;
 
     // Remember which edges have been considered for breaking.
-    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
     CEBCandidates;
     // Remember which edges we are about to split.
     // This is different from CEBCandidates since those edges
     // will be split.
-    SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
+    SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit;
 
     SparseBitVector<> RegsToClearKillFlags;
 
@@ -79,6 +102,7 @@ namespace {
 
   public:
     static char ID; // Pass identification
+
     MachineSinking() : MachineFunctionPass(ID) {
       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
     }
@@ -92,6 +116,7 @@ namespace {
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachinePostDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachinePostDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
@@ -143,12 +168,14 @@ namespace {
     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
                            AllSuccsCache &AllSuccessors) const;
   };
+
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
 char &llvm::MachineSinkingID = MachineSinking::ID;
 INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -269,11 +296,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   PDT = &getAnalysis<MachinePostDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   bool EverMadeChange = false;
 
-  while (1) {
+  while (true) {
     bool MadeChange = false;
 
     // Process all basic blocks.
@@ -369,6 +397,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
     return true;
 
+  if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
+      BranchProbability(SplitEdgeProbabilityThreshold, 100))
+    return true;
+
   // MI is cheap, we probably don't want to break the critical edge for it.
   // However, if this would allow some definitions of its source operands
   // to be sunk then it's probably worth it.
@@ -604,7 +636,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+        if (!MRI->isConstantPhysReg(Reg))
           return nullptr;
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 86332c8..ef7e525 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -430,16 +430,17 @@ public:
   po_iterator_storage(LoopBounds &lb) : LB(lb) {}
   void finishPostorder(const MachineBasicBlock*) {}
 
-  bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+  bool insertEdge(Optional<const MachineBasicBlock *> From,
+                  const MachineBasicBlock *To) {
     // Skip already visited To blocks.
     MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
     if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
       return false;
     // From is null once when To is the trace center block.
     if (From) {
-      if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+      if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
         // Don't follow backedges, don't leave FromLoop when going upwards.
-        if ((LB.Downward ? To : From) == FromLoop->getHeader())
+        if ((LB.Downward ? To : *From) == FromLoop->getHeader())
           return false;
         // Don't leave FromLoop.
         if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index a70adb0..a98139f 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -70,6 +70,10 @@ namespace {
 
     unsigned foundErrors;
 
+    // Avoid querying the MachineFunctionProperties for each operand.
+    bool isFunctionRegBankSelected;
+    bool isFunctionSelected;
+
     typedef SmallVector<unsigned, 16> RegVector;
     typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
@@ -204,16 +208,13 @@ namespace {
     void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
     void visitMachineFunctionAfter();
 
-    template <typename T> void report(const char *msg, ilist_iterator<T> I) {
-      report(msg, &*I);
-    }
     void report(const char *msg, const MachineFunction *MF);
     void report(const char *msg, const MachineBasicBlock *MBB);
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
     void report_context(const LiveInterval &LI) const;
-    void report_context(const LiveRange &LR, unsigned Reg,
+    void report_context(const LiveRange &LR, unsigned VRegUnit,
                         LaneBitmask LaneMask) const;
     void report_context(const LiveRange::Segment &S) const;
     void report_context(const VNInfo &VNI) const;
@@ -228,10 +229,10 @@ namespace {
     void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
                             SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
-                            LaneBitmask LaneMask = 0);
+                            LaneBitmask LaneMask = LaneBitmask::getNone());
     void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
                             SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
-                            LaneBitmask LaneMask = 0);
+                            LaneBitmask LaneMask = LaneBitmask::getNone());
 
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
@@ -242,11 +243,12 @@ namespace {
     void verifyLiveIntervals();
     void verifyLiveInterval(const LiveInterval&);
     void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
-                              unsigned);
+                              LaneBitmask);
     void verifyLiveRangeSegment(const LiveRange&,
                                 const LiveRange::const_iterator I, unsigned,
-                                unsigned);
-    void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
+                                LaneBitmask);
+    void verifyLiveRange(const LiveRange&, unsigned,
+                         LaneBitmask LaneMask = LaneBitmask::getNone());
 
     void verifyStackFrame();
 
@@ -310,15 +312,12 @@ void MachineVerifier::verifySlotIndexes() const {
 
 void MachineVerifier::verifyProperties(const MachineFunction &MF) {
   // If a pass has introduced virtual registers without clearing the
-  // AllVRegsAllocated property (or set it without allocating the vregs)
+  // NoVRegs property (or set it without allocating the vregs)
   // then report an error.
   if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::AllVRegsAllocated) &&
-      MRI->getNumVirtRegs()) {
-    report(
-        "Function has AllVRegsAllocated property but there are VReg operands",
-        &MF);
-  }
+          MachineFunctionProperties::Property::NoVRegs) &&
+      MRI->getNumVirtRegs())
+    report("Function has NoVRegs property but there are VReg operands", &MF);
 }
 
 unsigned MachineVerifier::verify(MachineFunction &MF) {
@@ -330,6 +329,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
 
+  isFunctionRegBankSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::RegBankSelected);
+  isFunctionSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Selected);
+
   LiveVars = nullptr;
   LiveInts = nullptr;
   LiveStks = nullptr;
@@ -359,7 +363,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
     for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != &*MFI) {
-        report("Bad instruction parent pointer", MFI);
+        report("Bad instruction parent pointer", &*MFI);
         errs() << "Instruction: " << *MBBI;
         continue;
       }
@@ -381,7 +385,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
         CurBundle = &*MBBI;
         visitMachineBundleBefore(CurBundle);
       } else if (!CurBundle)
-        report("No bundle header", MBBI);
+        report("No bundle header", &*MBBI);
       visitMachineInstrBefore(&*MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
         const MachineInstr &MI = *MBBI;
@@ -474,11 +478,11 @@ void MachineVerifier::report_context(const LiveInterval &LI) const {
   errs() << "- interval:    " << LI << '\n';
 }
 
-void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit,
                                      LaneBitmask LaneMask) const {
   report_context_liverange(LR);
-  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
-  if (LaneMask != 0)
+  report_context_vreg_regunit(VRegUnit);
+  if (LaneMask.any())
     report_context_lanemask(LaneMask);
 }
 
@@ -524,16 +528,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
   lastIndex = SlotIndex();
   regsReserved = MRI->getReservedRegs();
 
-  // A sub-register of a reserved register is also reserved
-  for (int Reg = regsReserved.find_first(); Reg>=0;
-       Reg = regsReserved.find_next(Reg)) {
-    for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
-      // FIXME: This should probably be:
-      // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
-      regsReserved.set(*SubRegs);
-    }
-  }
-
   markReachable(&MF->front());
 
   // Build a set of the basic blocks in the function.
@@ -571,7 +565,8 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   FirstTerminator = nullptr;
 
-  if (MRI->isSSA()) {
+  if (!MF->getProperties().hasProperty(
+      MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
     // If this block has allocatable physical registers live-in, check that
     // it is an entry block or landing pad.
     for (const auto &LI : MBB->liveins()) {
@@ -746,20 +741,21 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   }
 
   regsLive.clear();
-  for (const auto &LI : MBB->liveins()) {
-    if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
-      report("MBB live-in list contains non-physical register", MBB);
-      continue;
+  if (MRI->tracksLiveness()) {
+    for (const auto &LI : MBB->liveins()) {
+      if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+        report("MBB live-in list contains non-physical register", MBB);
+        continue;
+      }
+      for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        regsLive.insert(*SubRegs);
     }
-    for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
-         SubRegs.isValid(); ++SubRegs)
-      regsLive.insert(*SubRegs);
   }
   regsLiveInButUnused = regsLive;
 
-  const MachineFrameInfo *MFI = MF->getFrameInfo();
-  assert(MFI && "Function has no frame info");
-  BitVector PR = MFI->getPristineRegs(*MF);
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  BitVector PR = MFI.getPristineRegs(*MF);
   for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
     for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
          SubRegs.isValid(); ++SubRegs)
@@ -850,6 +846,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
         << MI->getNumOperands() << " given.\n";
   }
 
+  if (MI->isPHI() && MF->getProperties().hasProperty(
+          MachineFunctionProperties::Property::NoPHIs))
+    report("Found PHI instruction with NoPHIs property set", MI);
+
   // Check the tied operands.
   if (MI->isInlineAsm())
     verifyInlineAsm(MI);
@@ -879,6 +879,35 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     }
   }
 
+  // Check types.
+  if (isPreISelGenericOpcode(MCID.getOpcode())) {
+    if (isFunctionSelected)
+      report("Unexpected generic instruction in a Selected function", MI);
+
+    // Generic instructions specify equality constraints between some
+    // of their operands. Make sure these are consistent.
+    SmallVector<LLT, 4> Types;
+    for (unsigned i = 0; i < MCID.getNumOperands(); ++i) {
+      if (!MCID.OpInfo[i].isGenericType())
+        continue;
+      size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex();
+      Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+      LLT OpTy = MRI->getType(MI->getOperand(i).getReg());
+      if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy)
+        report("type mismatch in generic instruction", MI);
+      Types[TypeIdx] = OpTy;
+    }
+  }
+
+  // Generic opcodes must not have physical register operands.
+  if (isPreISelGenericOpcode(MCID.getOpcode())) {
+    for (auto &Op : MI->operands()) {
+      if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg()))
+        report("Generic instruction cannot have physical register", MI);
+    }
+  }
+
   StringRef ErrorInfo;
   if (!TII->verifyInstruction(*MI, ErrorInfo))
     report(ErrorInfo.data(), MI);
@@ -988,25 +1017,62 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
         if (!RC) {
           // This is a generic virtual register.
-          // It must have a size and it must not have a SubIdx.
-          unsigned Size = MRI->getSize(Reg);
-          if (!Size) {
-            report("Generic virtual register must have a size", MO, MONum);
+
+          // If we're post-Select, we can't have gvregs anymore.
+          if (isFunctionSelected) {
+            report("Generic virtual register invalid in a Selected function",
+                   MO, MONum);
             return;
           }
-          // Make sure the register fits into its register bank if any.
+
+          // The gvreg must have a type and it must not have a SubIdx.
+          LLT Ty = MRI->getType(Reg);
+          if (!Ty.isValid()) {
+            report("Generic virtual register must have a valid type", MO,
+                   MONum);
+            return;
+          }
+
           const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
-          if (RegBank && RegBank->getSize() < Size) {
+
+          // If we're post-RegBankSelect, the gvreg must have a bank.
+          if (!RegBank && isFunctionRegBankSelected) {
+            report("Generic virtual register must have a bank in a "
+                   "RegBankSelected function",
+                   MO, MONum);
+            return;
+          }
+
+          // Make sure the register fits into its register bank if any.
+          if (RegBank && Ty.isValid() &&
+              RegBank->getSize() < Ty.getSizeInBits()) {
             report("Register bank is too small for virtual register", MO,
                    MONum);
             errs() << "Register bank " << RegBank->getName() << " too small("
-                   << RegBank->getSize() << ") to fit " << Size << "-bits\n";
+                   << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+                   << "-bits\n";
             return;
           }
           if (SubIdx)  {
-            report("Generic virtual register does not subregister index", MO, MONum);
+            report("Generic virtual register does not subregister index", MO,
+                   MONum);
             return;
           }
+
+          // If this is a target specific instruction and this operand
+          // has register class constraint, the virtual register must
+          // comply to it.
+          if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+              TII->getRegClass(MCID, MONum, TRI, *MF)) {
+            report("Virtual register does not match instruction constraint", MO,
+                   MONum);
+            errs() << "Expect register class "
+                   << TRI->getRegClassName(
+                          TII->getRegClass(MCID, MONum, TRI, *MF))
+                   << " but got nothing\n";
+            return;
+          }
+
           break;
         }
         if (SubIdx) {
@@ -1113,7 +1179,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
   LiveQueryResult LRQ = LR.Query(UseIdx);
   // Check if we have a segment at the use, note however that we only need one
   // live subregister range, the others may be dead.
-  if (!LRQ.valueIn() && LaneMask == 0) {
+  if (!LRQ.valueIn() && LaneMask.none()) {
     report("No live segment at use", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
@@ -1123,7 +1189,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
     report("Live range continues after kill flag", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
-    if (LaneMask != 0)
+    if (LaneMask.any())
       report_context_lanemask(LaneMask);
     report_context(UseIdx);
   }
@@ -1138,7 +1204,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
       report("Inconsistent valno->def", MO, MONum);
       report_context_liverange(LR);
       report_context_vreg_regunit(VRegOrUnit);
-      if (LaneMask != 0)
+      if (LaneMask.any())
         report_context_lanemask(LaneMask);
       report_context(*VNI);
       report_context(DefIdx);
@@ -1147,7 +1213,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
     report("No live segment at def", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
-    if (LaneMask != 0)
+    if (LaneMask.any())
       report_context_lanemask(LaneMask);
     report_context(DefIdx);
   }
@@ -1177,7 +1243,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
         report("Live range continues after dead def flag", MO, MONum);
         report_context_liverange(LR);
         report_context_vreg_regunit(VRegOrUnit);
-        if (LaneMask != 0)
+        if (LaneMask.any())
           report_context_lanemask(LaneMask);
       }
     }
@@ -1199,7 +1265,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
     if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
         MO->isKill()) {
       LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
-      if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+      if (!is_contained(VI.Kills, MI))
         report("Kill missing from LiveVariables", MO, MONum);
     }
 
@@ -1225,9 +1291,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             LaneBitmask MOMask = SubRegIdx != 0
                                ? TRI->getSubRegIndexLaneMask(SubRegIdx)
                                : MRI->getMaxLaneMaskForVReg(Reg);
-            LaneBitmask LiveInMask = 0;
+            LaneBitmask LiveInMask;
             for (const LiveInterval::SubRange &SR : LI.subranges()) {
-              if ((MOMask & SR.LaneMask) == 0)
+              if ((MOMask & SR.LaneMask).none())
                 continue;
               checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
               LiveQueryResult LRQ = SR.Query(UseIdx);
@@ -1235,7 +1301,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
                 LiveInMask |= SR.LaneMask;
             }
             // At least parts of the register has to be live at the use.
-            if ((LiveInMask & MOMask) == 0) {
+            if ((LiveInMask & MOMask).none()) {
               report("No live subrange at use", MO, MONum);
               report_context(LI);
               report_context(UseIdx);
@@ -1327,7 +1393,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
               ? TRI->getSubRegIndexLaneMask(SubRegIdx)
               : MRI->getMaxLaneMaskForVReg(Reg);
             for (const LiveInterval::SubRange &SR : LI.subranges()) {
-              if ((SR.LaneMask & MOMask) == 0)
+              if ((SR.LaneMask & MOMask).none())
                 continue;
               checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
             }
@@ -1640,8 +1706,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
             !TRI->hasRegUnit(MOI->getReg(), Reg))
           continue;
       }
-      if (LaneMask != 0 &&
-          (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0)
+      if (LaneMask.any() &&
+          (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
         continue;
       hasDef = true;
       if (MOI->isEarlyClobber())
@@ -1772,15 +1838,22 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
       if (!MOI->isReg() || MOI->getReg() != Reg)
         continue;
-      if (LaneMask != 0 &&
-          (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
-        continue;
+      unsigned Sub = MOI->getSubReg();
+      LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
+                                 : LaneBitmask::getAll();
       if (MOI->isDef()) {
-        if (MOI->getSubReg() != 0)
+        if (Sub != 0) {
           hasSubRegDef = true;
+          // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane
+          // mask for subregister defs. Read-undef defs will be handled by
+          // readsReg below.
+          SLM = ~SLM;
+        }
         if (MOI->isDead())
           hasDeadDef = true;
       }
+      if (LaneMask.any() && (LaneMask & SLM).none())
+        continue;
       if (MOI->readsReg())
         hasRead = true;
     }
@@ -1788,7 +1861,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       // Make sure that the corresponding machine operand for a "dead" live
       // range has the dead flag. We cannot perform this check for subregister
       // liveranges as partially dead values are allowed.
-      if (LaneMask == 0 && !hasDeadDef) {
+      if (LaneMask.none() && !hasDeadDef) {
         report("Instruction ending live segment on dead slot has no dead flag",
                MI);
         report_context(LR, Reg, LaneMask);
@@ -1798,7 +1871,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!hasRead) {
         // When tracking subregister liveness, the main range must start new
         // values on partial register writes, even if there is no read.
-        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
             !hasSubRegDef) {
           report("Instruction ending live segment doesn't read the register",
                  MI);
@@ -1842,7 +1915,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
       // All predecessors must have a live-out value if this is not a
       // subregister liverange.
-      if (!PVNI && LaneMask == 0) {
+      if (!PVNI && LaneMask.none()) {
         report("Register not marked live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
         report_context(*VNI);
@@ -1882,14 +1955,14 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   verifyLiveRange(LI, Reg);
 
-  LaneBitmask Mask = 0;
+  LaneBitmask Mask;
   LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
-    if ((Mask & SR.LaneMask) != 0) {
+    if ((Mask & SR.LaneMask).any()) {
       report("Lane masks of sub ranges overlap in live interval", MF);
       report_context(LI);
     }
-    if ((SR.LaneMask & ~MaxMask) != 0) {
+    if ((SR.LaneMask & ~MaxMask).any()) {
       report("Subrange lanemask is invalid", MF);
       report_context(LI);
     }
@@ -1950,11 +2023,11 @@ void MachineVerifier::verifyStackFrame() {
 
   SmallVector<StackStateOfBB, 8> SPState;
   SPState.resize(MF->getNumBlockIDs());
-  SmallPtrSet<const MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<const MachineBasicBlock*> Reachable;
 
   // Visit the MBBs in DFS order.
   for (df_ext_iterator<const MachineFunction*,
-                       SmallPtrSet<const MachineBasicBlock*, 8> >
+                       df_iterator_default_set<const MachineBasicBlock*> >
        DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
        DFI != DFE; ++DFI) {
     const MachineBasicBlock *MBB = *DFI;
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index 0177e41..2a8531f 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -184,7 +184,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
       for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
            PI != PE; ++PI) {
         MachineInstr *PhiMI = *PI;
-        if (&*MII == PhiMI)
+        if (MII == PhiMI)
           ++MII;
         PhiMI->eraseFromParent();
       }
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index b8d5431..c67a25b 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -175,6 +175,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   ImpDefs.clear();
   VRegPHIUseCount.clear();
 
+  MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 4cabc3a..4e67ff2 100644
--- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -54,6 +54,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
     ++InsertPoint;
   }
 
-  // Make sure the copy goes after any phi nodes however.
+  // Make sure the copy goes after any phi nodes but before
+  // any debug nodes.
   return MBB->SkipPHIsAndLabels(InsertPoint);
 }
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
index ccdaec1..50dd44f 100644
--- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/ParallelCG.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -78,7 +79,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
           CodegenThreadPool.async(
               [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
                 LLVMContext Ctx;
-                ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+                Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
                     MemoryBufferRef(StringRef(BC.data(), BC.size()),
                                     "<split-module>"),
                     Ctx);
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
index 32468c9..ad9166f 100644
--- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -32,7 +32,7 @@ struct PatchableFunction : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &F) override;
    MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 60b27dd..6d64345 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -70,17 +70,28 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
 #include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "peephole-opt"
@@ -118,6 +129,7 @@ STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
 STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
 
 namespace {
+
   class ValueTrackerResult;
 
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -128,6 +140,7 @@ namespace {
 
   public:
     static char ID; // Pass identification
+
     PeepholeOptimizer() : MachineFunctionPass(ID) {
       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     }
@@ -390,10 +403,12 @@ namespace {
     /// register of the last source.
     unsigned getReg() const { return Reg; }
   };
-}
+
+} // end anonymous namespace
 
 char PeepholeOptimizer::ID = 0;
 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+
 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
                 "Peephole Optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -737,6 +752,7 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
 }
 
 namespace {
+
 /// \brief Helper class to rewrite the arguments of a copy-like instruction.
 class CopyRewriter {
 protected:
@@ -820,7 +836,6 @@ public:
                TargetInstrInfo::RegSubRegPair Def,
                PeepholeOptimizer::RewriteMapTy &RewriteMap,
                bool HandleMultipleSources = true) {
-
     TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
     do {
       ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
@@ -859,7 +874,7 @@ public:
       const MachineOperand &MODef = NewPHI->getOperand(0);
       return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
 
-    } while (1);
+    } while (true);
 
     return TargetInstrInfo::RegSubRegPair(0, 0);
   }
@@ -1001,6 +1016,7 @@ public:
     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
     return true;
   }
+
   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     if (CurrentSrcIdx != 2)
       return false;
@@ -1141,7 +1157,8 @@ public:
     return true;
   }
 };
-} // End namespace.
+
+}  // end anonymous namespace
 
 /// \brief Get the appropriated CopyRewriter for \p MI.
 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
@@ -1523,11 +1540,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       if (MI->isDebugValue())
           continue;
 
-      // If we run into an instruction we can't fold across, discard
-      // the load candidates.
-      if (MI->isLoadFoldBarrier())
-        FoldAsLoadDefCandidates.clear();
-
       if (MI->isPosition() || MI->isPHI())
         continue;
 
@@ -1571,7 +1583,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
                      << '\n');
         NAPhysToVirtMIs.clear();
-        continue;
       }
 
       if ((isUncoalescableCopy(*MI) &&
@@ -1622,8 +1633,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       // earlier load into MI.
       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
           !FoldAsLoadDefCandidates.empty()) {
+
+        // We visit each operand even after successfully folding a previous
+        // one.  This allows us to fold multiple loads into a single
+        // instruction.  We do assume that optimizeLoadInstr doesn't insert
+        // foldable uses earlier in the argument list.  Since we don't restart
+        // iteration, we'd miss such cases.
         const MCInstrDesc &MIDesc = MI->getDesc();
-        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+        for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
              ++i) {
           const MachineOperand &MOp = MI->getOperand(i);
           if (!MOp.isReg())
@@ -1650,13 +1667,23 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
               MRI->markUsesInDebugValueAsUndef(FoldedReg);
               FoldAsLoadDefCandidates.erase(FoldedReg);
               ++NumLoadFold;
-              // MI is replaced with FoldMI.
+              
+              // MI is replaced with FoldMI so we can continue trying to fold
               Changed = true;
-              break;
+              MI = FoldMI;
             }
           }
         }
       }
+      
+      // If we run into an instruction we can't fold across, discard
+      // the load candidates.  Note: We might be able to fold *into* this
+      // instruction, so this needs to be after the folding logic.
+      if (MI->isLoadFoldBarrier()) {
+        DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
+        FoldAsLoadDefCandidates.clear();
+      }
+
     }
   }
 
@@ -1688,7 +1715,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
   // Bitcasts with more than one def are not supported.
   if (Def->getDesc().getNumDefs() != 1)
     return ValueTrackerResult();
-  if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+  const MachineOperand DefOp = Def->getOperand(DefIdx);
+  if (DefOp.getSubReg() != DefSubReg)
     // If we look for a different subreg, it means we want a subreg of the src.
     // Bails as we do not support composing subregs yet.
     return ValueTrackerResult();
@@ -1708,6 +1736,14 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
       return ValueTrackerResult();
     SrcIdx = OpIdx;
   }
+
+  // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
+  // will break the assumed guarantees for the upper bits.
+  for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
+    if (UseMI.isSubregToReg())
+      return ValueTrackerResult();
+  }
+
   const MachineOperand &Src = Def->getOperand(SrcIdx);
   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
 }
@@ -1806,8 +1842,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
   // sub-register we are tracking.
   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
   if (!TRI ||
-      (TRI->getSubRegIndexLaneMask(DefSubReg) &
-       TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
+      !(TRI->getSubRegIndexLaneMask(DefSubReg) &
+        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
     return ValueTrackerResult();
   // At this point, the value is available in v0 via the same subreg
   // we used for Def.
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 3fce307..6081916 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -98,7 +98,7 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 20a9a39..5fca7fa 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -80,7 +80,7 @@ public:
   MachineFunctionProperties getRequiredProperties() const override {
     MachineFunctionProperties MFP;
     if (UsesCalleeSaves)
-      MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated);
+      MFP.set(MachineFunctionProperties::Property::NoVRegs);
     return MFP;
   }
 
@@ -117,6 +117,10 @@ private:
   // TRI->requiresFrameIndexScavenging() for the current function.
   bool FrameIndexVirtualScavenging;
 
+  // Flag to control whether the scavenger should be passed even though
+  // FrameIndexVirtualScavenging is used.
+  bool FrameIndexEliminationScavenging;
+
   void calculateCallFrameInfo(MachineFunction &Fn);
   void calculateSaveRestoreBlocks(MachineFunction &Fn);
 
@@ -176,6 +180,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+    TRI->requiresFrameIndexReplacementScavenging(Fn);
 
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
   // function's frame information. Also eliminates call frame pseudo
@@ -221,8 +227,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   }
 
   // Warn on stack size when we exceeds the given limit.
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
-  uint64_t StackSize = MFI->getStackSize();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+  uint64_t StackSize = MFI.getStackSize();
   if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
     DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
     F->getContext().diagnose(DiagStackSize);
@@ -231,8 +237,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();
-  MFI->setSavePoint(nullptr);
-  MFI->setRestorePoint(nullptr);
+  MFI.setSavePoint(nullptr);
+  MFI.setRestorePoint(nullptr);
   return true;
 }
 
@@ -242,10 +248,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
   const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool AdjustsStack = MFI->adjustsStack();
+  bool AdjustsStack = MFI.adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
   unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
@@ -274,8 +280,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
           AdjustsStack = true;
       }
 
-  MFI->setAdjustsStack(AdjustsStack);
-  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI.setAdjustsStack(AdjustsStack);
+  MFI.setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
          i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -293,17 +299,17 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
 /// Compute the sets of entry and return blocks for saving and restoring
 /// callee-saved registers, and placing prolog and epilog code.
 void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
-  const MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   // Even when we do not change any CSR, we still want to insert the
   // prologue and epilogue of the function.
   // So set the save points for those.
 
   // Use the points found by shrink-wrapping, if any.
-  if (MFI->getSavePoint()) {
-    SaveBlocks.push_back(MFI->getSavePoint());
-    assert(MFI->getRestorePoint() && "Both restore and save must be set");
-    MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+  if (MFI.getSavePoint()) {
+    SaveBlocks.push_back(MFI.getSavePoint());
+    assert(MFI.getRestorePoint() && "Both restore and save must be set");
+    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
     // If RestoreBlock does not have any successor and is not a return block
     // then the end point is unreachable and we do not need to insert any
     // epilogue.
@@ -340,7 +346,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
   }
 
   const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
-  MachineFrameInfo *MFI = F.getFrameInfo();
+  MachineFrameInfo &MFI = F.getFrameInfo();
   if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
     // If target doesn't implement this, use generic code.
 
@@ -379,26 +385,26 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
         // the TargetRegisterClass if the stack alignment is smaller. Use the
         // min.
         Align = std::min(Align, StackAlign);
-        FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+        FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true);
         if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
         if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
       } else {
         // Spill it to the stack where we must.
         FrameIdx =
-            MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+            MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
       }
 
       CS.setFrameIdx(FrameIdx);
     }
   }
 
-  MFI->setCalleeSavedInfo(CSI);
+  MFI.setCalleeSavedInfo(CSI);
 }
 
 /// Helper function to update the liveness information for the callee-saved
 /// registers.
 static void updateLiveness(MachineFunction &MF) {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   // Visited will contain all the basic blocks that are in the region
   // where the callee saved registers are alive:
   // - Anything that is not Save or Restore -> LiveThrough.
@@ -409,7 +415,7 @@ static void updateLiveness(MachineFunction &MF) {
   SmallPtrSet<MachineBasicBlock *, 8> Visited;
   SmallVector<MachineBasicBlock *, 8> WorkList;
   MachineBasicBlock *Entry = &MF.front();
-  MachineBasicBlock *Save = MFI->getSavePoint();
+  MachineBasicBlock *Save = MFI.getSavePoint();
 
   if (!Save)
     Save = Entry;
@@ -420,7 +426,7 @@ static void updateLiveness(MachineFunction &MF) {
   }
   Visited.insert(Save);
 
-  MachineBasicBlock *Restore = MFI->getRestorePoint();
+  MachineBasicBlock *Restore = MFI.getRestorePoint();
   if (Restore)
     // By construction Restore cannot be visited, otherwise it
     // means there exists a path to Restore that does not go
@@ -440,7 +446,7 @@ static void updateLiveness(MachineFunction &MF) {
         WorkList.push_back(SuccBB);
   }
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     for (MachineBasicBlock *MBB : Visited) {
@@ -460,10 +466,10 @@ static void insertCSRSpillsAndRestores(MachineFunction &Fn,
                                        const MBBVector &SaveBlocks,
                                        const MBBVector &RestoreBlocks) {
   // Get callee saved register information.
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
-  MFI->setCalleeSavedInfoValid(true);
+  MFI.setCalleeSavedInfoValid(true);
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
@@ -551,14 +557,14 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign, unsigned Skew) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -569,11 +575,11 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 
   if (StackGrowsDown) {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, Offset);
-    Offset += MFI->getObjectSize(FrameIdx);
+    MFI.setObjectOffset(FrameIdx, Offset);
+    Offset += MFI.getObjectSize(FrameIdx);
   }
 }
 
@@ -581,7 +587,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 /// track of them in StackBytesFree.
 ///
 static inline void
-computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
+computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
                       unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
                       int64_t FixedCSEnd, BitVector &StackBytesFree) {
   // Avoid undefined int64_t -> int conversion below in extreme case.
@@ -592,7 +598,7 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
 
   SmallVector<int, 16> AllocatedFrameSlots;
   // Add fixed objects.
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
+  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
     AllocatedFrameSlots.push_back(i);
   // Add callee-save objects.
   for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
@@ -601,8 +607,8 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
   for (int i : AllocatedFrameSlots) {
     // These are converted from int64_t, but they should always fit in int
     // because of the FixedCSEnd check above.
-    int ObjOffset = MFI->getObjectOffset(i);
-    int ObjSize = MFI->getObjectSize(i);
+    int ObjOffset = MFI.getObjectOffset(i);
+    int ObjSize = MFI.getObjectSize(i);
     int ObjStart, ObjEnd;
     if (StackGrowsDown) {
       // ObjOffset is negative when StackGrowsDown is true.
@@ -621,10 +627,10 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
 /// Assign frame object to an unused portion of the stack in the fixed stack
 /// object range.  Return true if the allocation was successful.
 ///
-static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
+static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
                                      bool StackGrowsDown, unsigned MaxAlign,
                                      BitVector &StackBytesFree) {
-  if (MFI->isVariableSizedObjectIndex(FrameIdx))
+  if (MFI.isVariableSizedObjectIndex(FrameIdx))
     return false;
 
   if (StackBytesFree.none()) {
@@ -634,11 +640,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
     return false;
   }
 
-  unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
+  unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx);
   if (ObjAlign > MaxAlign)
     return false;
 
-  int64_t ObjSize = MFI->getObjectSize(FrameIdx);
+  int64_t ObjSize = MFI.getObjectSize(FrameIdx);
   int FreeStart;
   for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
        FreeStart = StackBytesFree.find_next(FreeStart)) {
@@ -668,11 +674,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
     int ObjStart = -(FreeStart + ObjSize);
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
                  << "]\n");
-    MFI->setObjectOffset(FrameIdx, ObjStart);
+    MFI.setObjectOffset(FrameIdx, ObjStart);
   } else {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
                  << "]\n");
-    MFI->setObjectOffset(FrameIdx, FreeStart);
+    MFI.setObjectOffset(FrameIdx, FreeStart);
   }
 
   StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
@@ -684,7 +690,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
 static void
 AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                       SmallSet<int, 16> &ProtectedObjs,
-                      MachineFrameInfo *MFI, bool StackGrowsDown,
+                      MachineFrameInfo &MFI, bool StackGrowsDown,
                       int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
 
   for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
@@ -706,7 +712,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -725,17 +731,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // non-fixed objects can't be allocated right at the start of local area.
   // Adjust 'Offset' to point to the end of last fixed sized preallocated
   // object.
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -MFI->getObjectOffset(i);
+      FixedOff = -MFI.getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+      FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
@@ -746,32 +752,32 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
       // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
-      Offset += MFI->getObjectSize(i);
+      Offset += MFI.getObjectSize(i);
 
-      unsigned Align = MFI->getObjectAlignment(i);
+      unsigned Align = MFI.getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = alignTo(Offset, Align, Skew);
 
       DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
-      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
+      MFI.setObjectOffset(i, -Offset);        // Set the computed offset
     }
   } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
     // Be careful about underflow in comparisons agains MinCSFrameIndex.
     for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
-      unsigned Align = MFI->getObjectAlignment(i);
+      unsigned Align = MFI.getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = alignTo(Offset, Align, Skew);
 
       DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
-      MFI->setObjectOffset(i, Offset);
-      Offset += MFI->getObjectSize(i);
+      MFI.setObjectOffset(i, Offset);
+      Offset += MFI.getObjectSize(i);
     }
   }
 
   // FixedCSEnd is the stack offset to the end of the fixed and callee-save
   // stack area.
   int64_t FixedCSEnd = Offset;
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
   // incoming stack pointer if a frame pointer is required and is closer
@@ -793,8 +799,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // check for whether the frame is large enough to want to use virtual
   // frame index registers. Functions which don't want/need this optimization
   // will continue to use the existing code path.
-  if (MFI->getUseLocalStackAllocationBlock()) {
-    unsigned Align = MFI->getLocalFrameMaxAlign();
+  if (MFI.getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI.getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
     Offset = alignTo(Offset, Align, Skew);
@@ -802,15 +808,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
     // Resolve offsets for objects in the local block.
-    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
-      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+    for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
       int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
       DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
             FIOffset << "]\n");
-      MFI->setObjectOffset(Entry.first, FIOffset);
+      MFI.setObjectOffset(Entry.first, FIOffset);
     }
     // Allocate the local block
-    Offset += MFI->getLocalFrameSize();
+    Offset += MFI.getLocalFrameSize();
 
     MaxAlign = std::max(Align, MaxAlign);
   }
@@ -823,30 +829,30 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI->getStackProtectorIndex() >= 0) {
+  if (MFI.getStackProtectorIndex() >= 0) {
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign, Skew);
 
     // Assign large stack objects first.
-    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-      if (MFI->isObjectPreAllocated(i) &&
-          MFI->getUseLocalStackAllocationBlock())
+    for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+      if (MFI.isObjectPreAllocated(i) &&
+          MFI.getUseLocalStackAllocationBlock())
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
       if (RS && RS->isScavengingFrameIndex((int)i))
         continue;
-      if (MFI->isDeadObjectIndex(i))
+      if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI->getStackProtectorIndex() == (int)i ||
+      if (MFI.getStackProtectorIndex() == (int)i ||
           EHRegNodeFrameIndex == (int)i)
         continue;
 
-      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
       case StackProtector::SSPLK_None:
         continue;
       case StackProtector::SSPLK_SmallArray:
@@ -874,17 +880,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Then prepare to assign frame offsets to stack objects that are not used to
   // spill callee saved registers.
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isObjectPreAllocated(i) &&
-        MFI->getUseLocalStackAllocationBlock())
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
       continue;
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
     if (RS && RS->isScavengingFrameIndex((int)i))
       continue;
-    if (MFI->isDeadObjectIndex(i))
+    if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI->getStackProtectorIndex() == (int)i ||
+    if (MFI.getStackProtectorIndex() == (int)i ||
         EHRegNodeFrameIndex == (int)i)
       continue;
     if (ProtectedObjs.count(i))
@@ -911,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   BitVector StackBytesFree;
   if (!ObjectsToAllocate.empty() &&
       Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
-      MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+      MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
     computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
                           FixedCSEnd, StackBytesFree);
 
@@ -935,8 +940,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
-      Offset += MFI->getMaxCallFrameSize();
+    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI.getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -944,8 +949,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+    if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -958,7 +963,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Update frame info to pretend that this is part of the stack...
   int64_t StackSize = Offset - LocalAreaOffset;
-  MFI->setStackSize(StackSize);
+  MFI.setStackSize(StackSize);
   NumBytesStackSpace += StackSize;
 }
 
@@ -1009,7 +1014,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
-  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<MachineBasicBlock*> Reachable;
 
   // Iterate over the reachable blocks in DFS order.
   for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
@@ -1047,7 +1052,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
   unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
   unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
-  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB);
+  if (RS && FrameIndexEliminationScavenging)
+    RS->enterBasicBlock(*BB);
 
   bool InsideCallSequence = false;
 
@@ -1116,7 +1122,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
       // use that target machine register info object to eliminate
       // it.
       TRI.eliminateFrameIndex(MI, SPAdj, i,
-                              FrameIndexVirtualScavenging ?  nullptr : RS);
+                              FrameIndexEliminationScavenging ?  RS : nullptr);
 
       // Reset the iterator if we were at the beginning of the BB.
       if (AtBeginning) {
@@ -1132,7 +1138,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
     // the SP adjustment made by each instruction in the sequence.
     // This includes both the frame setup/destroy pseudos (handled above),
     // as well as other instructions that have side effects w.r.t the SP.
-    // Note that this must come after eliminateFrameIndex, because 
+    // Note that this must come after eliminateFrameIndex, because
     // if I itself referred to a frame index, we shouldn't count its own
     // adjustment.
     if (DidFinishLoop && InsideCallSequence)
@@ -1141,7 +1147,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
     if (DoIncr && I != BB->end()) ++I;
 
     // Update register states.
-    if (RS && !FrameIndexVirtualScavenging && DidFinishLoop)
+    if (RS && FrameIndexEliminationScavenging && DidFinishLoop)
       RS->forward(MI);
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 93eeb9c..fb49a93 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -41,7 +41,8 @@ static cl::opt<bool, true>
 VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
                cl::desc("Verify during register allocation"));
 
-const char RegAllocBase::TimerGroupName[] = "Register Allocation";
+const char RegAllocBase::TimerGroupName[] = "regalloc";
+const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
 bool RegAllocBase::VerifyEnabled = false;
 
 //===----------------------------------------------------------------------===//
@@ -67,7 +68,8 @@ void RegAllocBase::init(VirtRegMap &vrm,
 // register, unify them with the corresponding LiveIntervalUnion, otherwise push
 // them on the priority queue for later assignment.
 void RegAllocBase::seedLiveRegs() {
-  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
+                     TimerGroupDescription, TimePassesIsEnabled);
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI->reg_nodbg_empty(Reg))
@@ -143,6 +145,7 @@ void RegAllocBase::allocatePhysRegs() {
         continue;
       }
       DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(!SplitVirtReg->empty() && "expecting non-empty interval");
       assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
              "expect split value in virtual register");
       enqueue(SplitVirtReg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
index 296ffe8..d8921b5 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -105,6 +105,7 @@ protected:
 
   // Use this group name for NamedRegionTimer.
   static const char TimerGroupName[];
+  static const char TimerGroupDescription[];
 
   /// Method called when the allocator is about to remove a LiveInterval.
   virtual void aboutToRemoveInterval(LiveInterval &LI) {}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 11dfda6..a558e37 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -76,9 +76,7 @@ public:
   RABasic();
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "Basic Register Allocator";
-  }
+  StringRef getPassName() const override { return "Basic Register Allocator"; }
 
   /// RABasic analysis usage.
   void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -105,6 +103,11 @@ public:
   /// Perform register allocation.
   bool runOnMachineFunction(MachineFunction &mf) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
   // Helper for spilling all live virtual registers currently unified under preg
   // that interfere with the most recently queried lvr.  Return true if spilling
   // was successful, and append any new spilled/split intervals to splitLVRs.
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 55fb33e..fd759bc 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -149,18 +149,21 @@ namespace {
       spillImpossible = ~0u
     };
   public:
-    const char *getPassName() const override {
-      return "Fast Register Allocator";
-    }
+    StringRef getPassName() const override { return "Fast Register Allocator"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoPHIs);
+    }
+
     MachineFunctionProperties getSetProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -209,8 +212,8 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
     return SS;          // Already has space allocated?
 
   // Allocate a new stack object for this spill location...
-  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                            RC->getAlignment());
+  int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+                                                           RC->getAlignment());
 
   // Assign the slot.
   StackSlotForVirtReg[VirtReg] = FrameIdx;
@@ -360,7 +363,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     break;
   case regReserved:
     PhysRegState[PhysReg] = regFree;
-    // Fall through
+    LLVM_FALLTHROUGH;
   case regFree:
     MO.setIsKill();
     return;
@@ -389,7 +392,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
       assert((TRI->isSuperRegister(PhysReg, Alias) ||
               TRI->isSuperRegister(Alias, PhysReg)) &&
              "Instruction is not using a subregister of a reserved register");
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
@@ -421,7 +424,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
     break;
   default:
     spillVirtReg(MI, VirtReg);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case regFree:
   case regReserved:
     PhysRegState[PhysReg] = NewState;
@@ -437,7 +440,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
       break;
     default:
       spillVirtReg(MI, VirtReg);
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case regFree:
     case regReserved:
       PhysRegState[Alias] = regDisabled;
@@ -1093,8 +1096,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   UsedInInstr.clear();
   UsedInInstr.setUniverse(TRI->getNumRegUnits());
 
-  assert(!MRI->isSSA() && "regalloc requires leaving SSA");
-
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
   StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index c4d4b1e..c47cfb1 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -61,8 +61,7 @@ static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
     cl::desc("Spill mode for splitting live ranges"),
     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
-               clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
-               clEnumValEnd),
+               clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
     cl::init(SplitEditor::SM_Speed));
 
 static cl::opt<unsigned>
@@ -318,9 +317,7 @@ public:
   RAGreedy();
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "Greedy Register Allocator";
-  }
+  StringRef getPassName() const override { return "Greedy Register Allocator"; }
 
   /// RAGreedy analysis usage.
   void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -334,6 +331,11 @@ public:
   /// Perform register allocation.
   bool runOnMachineFunction(MachineFunction &mf) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
   static char ID;
 
 private:
@@ -421,6 +423,24 @@ private:
 } // end anonymous namespace
 
 char RAGreedy::ID = 0;
+char &llvm::RAGreedyID = RAGreedy::ID;
+
+INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
+                "Greedy Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_END(RAGreedy, "greedy",
+                "Greedy Register Allocator", false, false)
 
 #ifndef NDEBUG
 const char *const RAGreedy::StageName[] = {
@@ -444,19 +464,6 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
 }
 
 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
-  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
-  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
-  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
-  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
-  initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
-  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
-  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
 }
 
 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -639,6 +646,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
         evictInterference(VirtReg, Hint, NewVRegs);
         return Hint;
       }
+      // Record the missed hint, we may be able to recover
+      // at the end if the surrounding allocation changed.
+      SetOfBrokenHints.insert(&VirtReg);
     }
 
   // Try to evict interference from a cheaper alternative.
@@ -859,7 +869,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             AllocationOrder &Order,
                             SmallVectorImpl<unsigned> &NewVRegs,
                             unsigned CostPerUseLimit) {
-  NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+                     TimePassesIsEnabled);
 
   // Keep track of the cheapest interference seen so far.
   EvictionCost BestCost;
@@ -1957,7 +1968,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
   // Local intervals are handled separately.
   if (LIS->intervalIsInOneMBB(VirtReg)) {
-    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
+                       TimerGroupDescription, TimePassesIsEnabled);
     SA->analyze(&VirtReg);
     unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
     if (PhysReg || !NewVRegs.empty())
@@ -1965,7 +1977,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
     return tryInstructionSplit(VirtReg, Order, NewVRegs);
   }
 
-  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
+                     TimerGroupDescription, TimePassesIsEnabled);
 
   SA->analyze(&VirtReg);
 
@@ -2103,6 +2116,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
   // this recoloring "session".
   FixedRegisters.insert(VirtReg.reg);
+  SmallVector<unsigned, 4> CurrentNewVRegs;
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
@@ -2110,6 +2124,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
                  << PrintReg(PhysReg, TRI) << '\n');
     RecoloringCandidates.clear();
     VirtRegToPhysReg.clear();
+    CurrentNewVRegs.clear();
 
     // It is only possible to recolor virtual register interference.
     if (Matrix->checkInterference(VirtReg, PhysReg) >
@@ -2154,8 +2169,11 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     // If we cannot recolor all the interferences, we will have to start again
     // at this point for the next physical register.
     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
-    if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
-                                Depth)) {
+    if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
+                                FixedRegisters, Depth)) {
+      // Push the queued vregs into the main queue.
+      for (unsigned NewVReg : CurrentNewVRegs)
+        NewVRegs.push_back(NewVReg);
       // Do not mess up with the global assignment process.
       // I.e., VirtReg must be unassigned.
       Matrix->unassign(VirtReg);
@@ -2169,6 +2187,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     FixedRegisters = SaveFixedRegisters;
     Matrix->unassign(VirtReg);
 
+    // For a newly created vreg which is also in RecoloringCandidates,
+    // don't add it to NewVRegs because its physical register will be restored
+    // below. Other vregs in CurrentNewVRegs are created by calling
+    // selectOrSplit and should be added into NewVRegs.
+    for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(),
+                                             End = CurrentNewVRegs.end();
+         Next != End; ++Next) {
+      if (RecoloringCandidates.count(&LIS->getInterval(*Next)))
+        continue;
+      NewVRegs.push_back(*Next);
+    }
+
     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
                               EndIt = RecoloringCandidates.end();
          It != EndIt; ++It) {
@@ -2201,10 +2231,21 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
     DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
     unsigned PhysReg;
     PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
-    if (PhysReg == ~0u || !PhysReg)
+    // When splitting happens, the live-range may actually be empty.
+    // In that case, this is okay to continue the recoloring even
+    // if we did not find an alternative color for it. Indeed,
+    // there will not be anything to color for LI in the end.
+    if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
       return false;
+
+    if (!PhysReg) {
+      assert(LI->empty() && "Only empty live-range do not require a register");
+      DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n");
+      continue;
+    }
     DEBUG(dbgs() << "Recoloring of " << *LI
                  << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+
     Matrix->assign(*LI, PhysReg);
     FixedRegisters.insert(LI->reg);
   }
@@ -2519,7 +2560,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
       return PhysReg;
     }
 
-  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+  assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
 
   // The first time we see a live range, don't try to split or spill.
   // Wait until the second time, when all smaller ranges have been allocated.
@@ -2531,17 +2572,20 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     return 0;
   }
 
+  if (Stage < RS_Spill) {
+    // Try splitting VirtReg or interferences.
+    unsigned NewVRegSizeBefore = NewVRegs.size();
+    unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+    if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+      return PhysReg;
+  }
+
   // If we couldn't allocate a register from spilling, there is probably some
   // invalid inline assembly. The base class wil report it.
   if (Stage >= RS_Done || !VirtReg.isSpillable())
     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
                                    Depth);
 
-  // Try splitting VirtReg or interferences.
-  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
-  if (PhysReg || !NewVRegs.empty())
-    return PhysReg;
-
   // Finally spill VirtReg itself.
   if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
     // TODO: This is experimental and in particular, we do not model
@@ -2552,7 +2596,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     DEBUG(dbgs() << "Do as if this register is in memory\n");
     NewVRegs.push_back(VirtReg.reg);
   } else {
-    NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("spill", "Spiller", TimerGroupName,
+                       TimerGroupDescription, TimePassesIsEnabled);
     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
     spiller().spill(LRE);
     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index d1221ec..101b30b 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -99,9 +99,7 @@ public:
   }
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "PBQP Register Allocator";
-  }
+  StringRef getPassName() const override { return "PBQP Register Allocator"; }
 
   /// PBQP analysis usage.
   void getAnalysisUsage(AnalysisUsage &au) const override;
@@ -109,6 +107,11 @@ public:
   /// Perform register allocation
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
 private:
 
   typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 50b8854..ece44c2 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -48,7 +48,7 @@ public:
     initializeRegUsageInfoCollectorPass(Registry);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Register Usage Information Collector Pass";
   }
 
@@ -57,10 +57,6 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   static char ID;
-
-private:
-  void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask,
-                        unsigned PReg);
 };
 } // end of anonymous namespace
 
@@ -76,13 +72,6 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
   return new RegUsageInfoCollector();
 }
 
-void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI,
-                                             uint32_t *RegMask, unsigned PReg) {
-  // If PReg is clobbered then all of its alias are also clobbered.
-  for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
-    RegMask[*AI / 32] &= ~(1u << *AI % 32);
-}
-
 void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<PhysicalRegisterUsageInfo>();
   AU.setPreservesAll();
@@ -116,7 +105,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
 
   for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
     if (MRI->isPhysRegModified(PReg, true))
-      markRegClobbered(TRI, &RegMask[0], PReg);
+      RegMask[PReg / 32] &= ~(1u << PReg % 32);
 
   if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
     const uint32_t *CallPreservedMask =
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 7595661..5cc35bf 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -52,7 +52,7 @@ public:
     initializeRegUsageInfoPropagationPassPass(Registry);
   }
 
-  const char *getPassName() const override { return RUIP_NAME; }
+  StringRef getPassName() const override { return RUIP_NAME; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 617ece9..4bb3c22 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -815,14 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       for (LiveInterval::SubRange &SB : IntB.subranges()) {
         LaneBitmask BMask = SB.LaneMask;
         LaneBitmask Common = BMask & AMask;
-        if (Common == 0)
+        if (Common.none())
           continue;
 
         DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
                       << " into " << PrintLaneMask(Common) << '\n');
         LaneBitmask BRest = BMask & ~AMask;
         LiveInterval::SubRange *CommonRange;
-        if (BRest != 0) {
+        if (BRest.any()) {
           SB.LaneMask = BRest;
           DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
                        << '\n');
@@ -841,7 +841,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
         addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
         AMask &= ~BMask;
       }
-      if (AMask != 0) {
+      if (AMask.any()) {
         DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
         LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
         VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
@@ -975,6 +975,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
         NewRC = CommonRC;
         DstIdx = 0;
         DefMO.setSubReg(0);
+        DefMO.setIsUndef(false); // Only subregs can have def+undef.
       }
     }
   }
@@ -1060,7 +1061,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
           SR.createDeadDef(DefIndex, Alloc);
         MaxMask &= ~SR.LaneMask;
       }
-      if (MaxMask != 0) {
+      if (MaxMask.any()) {
         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
         SR->createDeadDef(DefIndex, Alloc);
       }
@@ -1153,7 +1154,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
-      if ((SR.LaneMask & SrcMask) == 0)
+      if ((SR.LaneMask & SrcMask).none())
         continue;
       if (SR.liveAt(Idx))
         return false;
@@ -1174,7 +1175,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     // The affected subregister segments can be removed.
     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
-      if ((SR.LaneMask & DstMask) == 0)
+      if ((SR.LaneMask & DstMask).none())
         continue;
 
       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
@@ -1193,10 +1194,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
     bool isLive;
-    if (UseMask != ~0u && DstLI.hasSubRanges()) {
+    if (!UseMask.all() && DstLI.hasSubRanges()) {
       isLive = false;
       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
-        if ((SR.LaneMask & UseMask) == 0)
+        if ((SR.LaneMask & UseMask).none())
           continue;
         if (SR.liveAt(UseIdx)) {
           isLive = true;
@@ -1210,6 +1211,17 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     MO.setIsUndef(true);
     DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
   }
+
+  // A def of a subregister may be a use of the other subregisters, so
+  // deleting a def of a subregister may also remove uses. Since CopyMI
+  // is still part of the function (but about to be erased), mark all
+  // defs of DstReg in it as <undef>, so that shrinkToUses would
+  // ignore them.
+  for (MachineOperand &MO : CopyMI->operands())
+    if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+      MO.setIsUndef(true);
+  LIS->shrinkToUses(&DstLI);
+
   return true;
 }
 
@@ -1220,7 +1232,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
     Mask = ~Mask;
   bool IsUndef = true;
   for (const LiveInterval::SubRange &S : Int.subranges()) {
-    if ((S.LaneMask & Mask) == 0)
+    if ((S.LaneMask & Mask).none())
       continue;
     if (S.liveAt(UseIdx)) {
       IsUndef = false;
@@ -1446,7 +1458,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     });
   }
 
-  ShrinkMask = 0;
+  ShrinkMask = LaneBitmask::getNone();
   ShrinkMainRange = false;
 
   // Okay, attempt to join these two intervals.  On failure, this returns false.
@@ -1504,10 +1516,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
 
   // Shrink subregister ranges if necessary.
-  if (ShrinkMask != 0) {
+  if (ShrinkMask.any()) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
     for (LiveInterval::SubRange &S : LI.subranges()) {
-      if ((S.LaneMask & ShrinkMask) == 0)
+      if ((S.LaneMask & ShrinkMask).none())
         continue;
       DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
                    << ")\n");
@@ -1544,9 +1556,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   unsigned DstReg = CP.getDstReg();
+  unsigned SrcReg = CP.getSrcReg();
   assert(CP.isPhys() && "Must be a physreg copy");
   assert(MRI->isReserved(DstReg) && "Not a reserved register");
-  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+  LiveInterval &RHS = LIS->getInterval(SrcReg);
   DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
 
   assert(RHS.containsOneValue() && "Invalid join with reserved register");
@@ -1558,11 +1571,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 
   // Deny any overlapping intervals.  This depends on all the reserved
   // register live ranges to look like dead defs.
-  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
-    if (RHS.overlaps(LIS->getRegUnit(*UI))) {
-      DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
-      return false;
+  if (!MRI->isConstantPhysReg(DstReg)) {
+    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+      // Abort if not all the regunits are reserved.
+      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+        if (!MRI->isReserved(*RI))
+          return false;
+      }
+      if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+        DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+        return false;
+      }
     }
+  }
 
   // Skip any value computations, we are not adding new values to the
   // reserved register.  Also skip merging the live ranges, the reserved
@@ -1572,43 +1593,64 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   // Delete the identity copy.
   MachineInstr *CopyMI;
   if (CP.isFlipped()) {
-    CopyMI = MRI->getVRegDef(RHS.reg);
+    // Physreg is copied into vreg
+    //   %vregY = COPY %X
+    //   ...  //< no other def of %X here
+    //   use %vregY
+    // =>
+    //   ...
+    //   use %X
+    CopyMI = MRI->getVRegDef(SrcReg);
   } else {
-    if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+    // VReg is copied into physreg:
+    //   %vregX = def
+    //   ... //< no other def or use of %Y here
+    //   %Y = COPY %vregX
+    // =>
+    //   %Y = def
+    //   ...
+    if (!MRI->hasOneNonDBGUse(SrcReg)) {
       DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
       return false;
     }
 
-    MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
-    CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
-    const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
-    const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
-
-    // We checked above that there are no interfering defs of the physical
-    // register. However, for this case, where we intent to move up the def of
-    // the physical register, we also need to check for interfering uses.
-    SlotIndexes *Indexes = LIS->getSlotIndexes();
-    for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
-         SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
-      MachineInstr *MI = LIS->getInstructionFromIndex(SI);
-      if (MI->readsRegister(DstReg, TRI)) {
-        DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
-        return false;
-      }
+    if (!LIS->intervalIsInOneMBB(RHS)) {
+      DEBUG(dbgs() << "\t\tComplex control flow!\n");
+      return false;
+    }
 
-      // We must also check for clobbers caused by regmasks.
-      for (const auto &MO : MI->operands()) {
-        if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
-          DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+    MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
+    CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
+    SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+    SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+    if (!MRI->isConstantPhysReg(DstReg)) {
+      // We checked above that there are no interfering defs of the physical
+      // register. However, for this case, where we intent to move up the def of
+      // the physical register, we also need to check for interfering uses.
+      SlotIndexes *Indexes = LIS->getSlotIndexes();
+      for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+           SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+        MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+        if (MI->readsRegister(DstReg, TRI)) {
+          DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
           return false;
         }
+
+        // We must also check for clobbers caused by regmasks.
+        for (const auto &MO : MI->operands()) {
+          if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
+            DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+            return false;
+          }
+        }
       }
     }
 
     // We're going to remove the copy which defines a physical reserved
     // register, so remove its valno, etc.
-    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
-          << CopyRegIdx << "\n");
+    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
+          << " at " << CopyRegIdx << "\n");
 
     LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
     // Create a new dead def at the new def location.
@@ -1795,11 +1837,11 @@ class JoinVals {
     /// True once Pruned above has been computed.
     bool PrunedComputed;
 
-    Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+    Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(),
             RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
             Pruned(false), PrunedComputed(false) {}
 
-    bool isAnalyzed() const { return WriteLanes != 0; }
+    bool isAnalyzed() const { return WriteLanes.any(); }
   };
 
   /// One entry per value number in LI.
@@ -1889,12 +1931,22 @@ public:
   /// no useful information and can be removed.
   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
 
+  /// Pruning values in subranges can lead to removing segments in these
+  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
+  /// the main range also need to be removed. This function will mark
+  /// the corresponding values in the main range as pruned, so that
+  /// eraseInstrs can do the final cleanup.
+  /// The parameter @p LI must be the interval whose main range is the
+  /// live range LR.
+  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
+
   /// Erase any machine instructions that have been coalesced away.
   /// Add erased instructions to ErasedInstrs.
   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
   /// the erased instrs.
   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
-                   SmallVectorImpl<unsigned> &ShrinkRegs);
+                   SmallVectorImpl<unsigned> &ShrinkRegs,
+                   LiveInterval *LI = nullptr);
 
   /// Remove liverange defs at places where implicit defs will be removed.
   void removeImplicitDefs();
@@ -1906,7 +1958,7 @@ public:
 
 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   const {
-  LaneBitmask L = 0;
+  LaneBitmask L;
   for (const MachineOperand &MO : DefMI->operands()) {
     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
       continue;
@@ -1944,7 +1996,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
       for (const LiveInterval::SubRange &S : LI.subranges()) {
         // Transform lanemask to a mask in the joined live interval.
         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
-        if ((SMask & LaneMask) == 0)
+        if ((SMask & LaneMask).none())
           continue;
         LiveQueryResult LRQ = S.Query(Def);
         ValueIn = LRQ.valueIn();
@@ -1984,7 +2036,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   assert(!V.isAnalyzed() && "Value has already been analyzed!");
   VNInfo *VNI = LR.getValNumInfo(ValNo);
   if (VNI->isUnused()) {
-    V.WriteLanes = ~0u;
+    V.WriteLanes = LaneBitmask::getAll();
     return CR_Keep;
   }
 
@@ -1992,16 +2044,17 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   const MachineInstr *DefMI = nullptr;
   if (VNI->isPHIDef()) {
     // Conservatively assume that all lanes in a PHI are valid.
-    LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1)
+                                     : TRI->getSubRegIndexLaneMask(SubIdx);
     V.ValidLanes = V.WriteLanes = Lanes;
   } else {
     DefMI = Indexes->getInstructionFromIndex(VNI->def);
     assert(DefMI != nullptr);
     if (SubRangeJoin) {
       // We don't care about the lanes when joining subregister ranges.
-      V.WriteLanes = V.ValidLanes = 1;
+      V.WriteLanes = V.ValidLanes = LaneBitmask(1);
       if (DefMI->isImplicitDef()) {
-        V.ValidLanes = 0;
+        V.ValidLanes = LaneBitmask::getNone();
         V.ErasableImplicitDef = true;
       }
     } else {
@@ -2074,7 +2127,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     // predecessor, the PHI itself can't introduce any conflicts.
     if (VNI->isPHIDef())
       return CR_Merge;
-    if (V.ValidLanes & OtherV.ValidLanes)
+    if ((V.ValidLanes & OtherV.ValidLanes).any())
       // Overlapping lanes can't be resolved.
       return CR_Impossible;
     else
@@ -2119,7 +2172,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     // We need the def for the subregister if there is nothing else live at the
     // subrange at this point.
     if (TrackSubRegLiveness
-        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0)
+        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
       return CR_Replace;
     return CR_Erase;
   }
@@ -2159,7 +2212,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   //
   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
   // handles this complex value mapping.
-  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+  if ((V.WriteLanes & OtherV.ValidLanes).none())
     return CR_Replace;
 
   // If the other live range is killed by DefMI and the live ranges are still
@@ -2180,7 +2233,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   // possibility that no instructions actually read the clobbered lanes.
   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
   // Otherwise Other.RI wouldn't be live here.
-  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
     return CR_Impossible;
 
   // We need to verify that no instructions are reading the clobbered lanes. To
@@ -2228,11 +2281,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
     Val &OtherV = Other.Vals[V.OtherVNI->id];
     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
     // its lanes.
-    if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness)
+    if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
       OtherV.ErasableImplicitDef = false;
     OtherV.Pruned = true;
+    LLVM_FALLTHROUGH;
   }
-    // Fall through.
   default:
     // This value number needs to go in the final joined live range.
     Assignments[ValNo] = NewVNInfo.size();
@@ -2289,7 +2342,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
     TaintedLanes &= ~OV.WriteLanes;
     if (!OV.RedefVNI)
       break;
-  } while (TaintedLanes);
+  } while (TaintedLanes.any());
   return true;
 }
 
@@ -2302,8 +2355,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
       continue;
     if (!MO.readsReg())
       continue;
-    if (Lanes & TRI->getSubRegIndexLaneMask(
-                  TRI->composeSubRegIndices(SubIdx, MO.getSubReg())))
+    unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+    if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
       return true;
   }
   return false;
@@ -2350,7 +2403,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
       Indexes->getInstructionFromIndex(TaintExtent.front().first);
     assert(LastMI && "Range must end at a proper instruction");
     unsigned TaintNum = 0;
-    for(;;) {
+    for (;;) {
       assert(MI != MBB->end() && "Bad LastMI");
       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
         DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
@@ -2415,7 +2468,8 @@ void JoinVals::pruneValues(JoinVals &Other,
           for (MachineOperand &MO :
                Indexes->getInstructionFromIndex(Def)->operands()) {
             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
-              MO.setIsUndef(EraseImpDef);
+              if (MO.getSubReg() != 0)
+                MO.setIsUndef(EraseImpDef);
               MO.setIsDead(false);
             }
           }
@@ -2448,8 +2502,7 @@ void JoinVals::pruneValues(JoinVals &Other,
   }
 }
 
-void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
-{
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
   // Look for values being erased.
   bool DidPrune = false;
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
@@ -2486,6 +2539,30 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
     LI.removeEmptySubRanges();
 }
 
+/// Check if any of the subranges of @p LI contain a definition at @p Def.
+static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
+  for (LiveInterval::SubRange &SR : LI.subranges()) {
+    if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
+      if (VNI->def == Def)
+        return true;
+  }
+  return false;
+}
+
+void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
+  assert(&static_cast<LiveRange&>(LI) == &LR);
+
+  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+    if (Vals[i].Resolution != CR_Keep)
+      continue;
+    VNInfo *VNI = LR.getValNumInfo(i);
+    if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
+      continue;
+    Vals[i].Pruned = true;
+    ShrinkMainRange = true;
+  }
+}
+
 void JoinVals::removeImplicitDefs() {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     Val &V = Vals[i];
@@ -2499,7 +2576,8 @@ void JoinVals::removeImplicitDefs() {
 }
 
 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
-                           SmallVectorImpl<unsigned> &ShrinkRegs) {
+                           SmallVectorImpl<unsigned> &ShrinkRegs,
+                           LiveInterval *LI) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LR.getValNumInfo(i)->def;
@@ -2511,13 +2589,65 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
         break;
       // Remove value number i from LR.
+      // For intervals with subranges, removing a segment from the main range
+      // may require extending the previous segment: for each definition of
+      // a subregister, there will be a corresponding def in the main range.
+      // That def may fall in the middle of a segment from another subrange.
+      // In such cases, removing this def from the main range must be
+      // complemented by extending the main range to account for the liveness
+      // of the other subrange.
       VNInfo *VNI = LR.getValNumInfo(i);
+      SlotIndex Def = VNI->def;
+      // The new end point of the main range segment to be extended.
+      SlotIndex NewEnd;
+      if (LI != nullptr) {
+        LiveRange::iterator I = LR.FindSegmentContaining(Def);
+        assert(I != LR.end());
+        // Do not extend beyond the end of the segment being removed.
+        // The segment may have been pruned in preparation for joining
+        // live ranges.
+        NewEnd = I->end;
+      }
+
       LR.removeValNo(VNI);
       // Note that this VNInfo is reused and still referenced in NewVNInfo,
       // make it appear like an unused value number.
       VNI->markUnused();
-      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
-      // FALL THROUGH.
+
+      if (LI != nullptr && LI->hasSubRanges()) {
+        assert(static_cast<LiveRange*>(LI) == &LR);
+        // Determine the end point based on the subrange information:
+        // minimum of (earliest def of next segment,
+        //             latest end point of containing segment)
+        SlotIndex ED, LE;
+        for (LiveInterval::SubRange &SR : LI->subranges()) {
+          LiveRange::iterator I = SR.find(Def);
+          if (I == SR.end())
+            continue;
+          if (I->start > Def)
+            ED = ED.isValid() ? std::min(ED, I->start) : I->start;
+          else
+            LE = LE.isValid() ? std::max(LE, I->end) : I->end;
+        }
+        if (LE.isValid())
+          NewEnd = std::min(NewEnd, LE);
+        if (ED.isValid())
+          NewEnd = std::min(NewEnd, ED);
+
+        // We only want to do the extension if there was a subrange that
+        // was live across Def.
+        if (LE.isValid()) {
+          LiveRange::iterator S = LR.find(Def);
+          if (S != LR.begin())
+            std::prev(S)->end = NewEnd;
+        }
+      }
+      DEBUG({
+        dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
+        if (LI != nullptr)
+          dbgs() << "\t\t  LHS = " << *LI << '\n';
+      });
+      LLVM_FALLTHROUGH;
     }
 
     case CR_Erase: {
@@ -2591,8 +2721,15 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 
   // Recompute the parts of the live range we had to remove because of
   // CR_Replace conflicts.
-  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
-               << " points: " << LRange << '\n');
+  DEBUG({
+    dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+    for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+      dbgs() << EndPoints[i];
+      if (i != n-1)
+        dbgs() << ',';
+    }
+    dbgs() << ":  " << LRange << '\n';
+  });
   LIS->extendToIndices(LRange, EndPoints);
 }
 
@@ -2606,7 +2743,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     // LaneMask of subregisters common to subrange R and ToMerge.
     LaneBitmask Common = RMask & LaneMask;
     // There is nothing to do without common subregs.
-    if (Common == 0)
+    if (Common.none())
       continue;
 
     DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
@@ -2615,7 +2752,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     // they have to split into their own subrange.
     LaneBitmask LRest = RMask & ~LaneMask;
     LiveInterval::SubRange *CommonRange;
-    if (LRest != 0) {
+    if (LRest.any()) {
       R.LaneMask = LRest;
       DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
       // Duplicate SubRange for newly merged common stuff.
@@ -2630,7 +2767,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     LaneMask &= ~RMask;
   }
 
-  if (LaneMask != 0) {
+  if (LaneMask.any()) {
     DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
     LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
   }
@@ -2641,10 +2778,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
-  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
-                   TRI, false, TrackSubRegLiveness);
-  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
-                   TRI, false, TrackSubRegLiveness);
+  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
+                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
+  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
+                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
 
   DEBUG(dbgs() << "\t\tRHS = " << RHS
                << "\n\t\tLHS = " << LHS
@@ -2670,7 +2807,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
                                      : TRI->getSubRegIndexLaneMask(DstIdx);
       // LHS must support subregs or we wouldn't be in this codepath.
-      assert(Mask != 0);
+      assert(Mask.any());
       LHS.createSubRangeFrom(Allocator, Mask, LHS);
     } else if (DstIdx != 0) {
       // Transform LHS lanemasks to new register class if necessary.
@@ -2697,6 +2834,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
     }
     DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
 
+    // Pruning implicit defs from subranges may result in the main range
+    // having stale segments.
+    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
@@ -2712,7 +2853,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
   // registers to require trimming.
   SmallVector<unsigned, 8> ShrinkRegs;
-  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   while (!ShrinkRegs.empty())
     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
@@ -2729,8 +2870,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   if (!EndPoints.empty()) {
     // Recompute the parts of the live range we had to remove because of
     // CR_Replace conflicts.
-    DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
-                 << " points: " << LHS << '\n');
+    DEBUG({
+      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+        dbgs() << EndPoints[i];
+        if (i != n-1)
+          dbgs() << ',';
+      }
+      dbgs() << ":  " << LHS << '\n';
+    });
     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
   }
 
@@ -3039,7 +3187,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
           // If subranges are still supported, then the same subregs
           // should still be supported.
           for (LiveInterval::SubRange &S : LI.subranges()) {
-            assert((S.LaneMask & ~MaxMask) == 0);
+            assert((S.LaneMask & ~MaxMask).none());
           }
 #endif
         }
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index a21d6c1..fc84aeb 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -26,8 +26,8 @@ using namespace llvm;
 static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 const MachineRegisterInfo &MRI, unsigned Reg,
                                 LaneBitmask PrevMask, LaneBitmask NewMask) {
-  assert((PrevMask & ~NewMask) == 0 && "Must not remove bits");
-  if (PrevMask != 0 || NewMask == 0)
+  assert((PrevMask & ~NewMask).none() && "Must not remove bits");
+  if (PrevMask.any() || NewMask.none())
     return;
 
   PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -40,8 +40,8 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
 static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 const MachineRegisterInfo &MRI, unsigned Reg,
                                 LaneBitmask PrevMask, LaneBitmask NewMask) {
-  assert((NewMask & !PrevMask) == 0 && "Must not add bits");
-  if (NewMask != 0 || PrevMask == 0)
+  //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+  if (NewMask.any() || PrevMask.none())
     return;
 
   PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -73,7 +73,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Live In: ";
   for (const RegisterMaskPair &P : LiveInRegs) {
     dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
-    if (P.LaneMask != ~0u)
+    if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
   }
@@ -81,7 +81,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Live Out: ";
   for (const RegisterMaskPair &P : LiveOutRegs) {
     dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
-    if (P.LaneMask != ~0u)
+    if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
   }
@@ -112,7 +112,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
 void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
                                              LaneBitmask PreviousMask,
                                              LaneBitmask NewMask) {
-  if (PreviousMask != 0 || NewMask == 0)
+  if (PreviousMask.any() || NewMask.none())
     return;
 
   PSetIterator PSetI = MRI->getPressureSets(RegUnit);
@@ -266,9 +266,8 @@ bool RegPressureTracker::isBottomClosed() const {
 
 
 SlotIndex RegPressureTracker::getCurrSlot() const {
-  MachineBasicBlock::const_iterator IdxPos = CurrPos;
-  while (IdxPos != MBB->end() && IdxPos->isDebugValue())
-    ++IdxPos;
+  MachineBasicBlock::const_iterator IdxPos =
+    skipDebugInstructionsForward(CurrPos, MBB->end());
   if (IdxPos == MBB->end())
     return LIS->getMBBEndIdx(MBB);
   return LIS->getInstructionIndex(*IdxPos).getRegSlot();
@@ -322,29 +321,28 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
     unsigned RegUnit = Pair.RegUnit;
     if (TargetRegisterInfo::isVirtualRegister(RegUnit)
         && !RPTracker.hasUntiedDef(RegUnit))
-      increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask);
+      increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
+                          LaneBitmask::getNone(), Pair.LaneMask);
   }
 }
 
 static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
                                unsigned RegUnit) {
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                        return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end())
-    return 0;
+    return LaneBitmask::getNone();
   return I->LaneMask;
 }
 
 static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                         RegisterMaskPair Pair) {
   unsigned RegUnit = Pair.RegUnit;
-  assert(Pair.LaneMask != 0);
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  assert(Pair.LaneMask.any());
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end()) {
     RegUnits.push_back(Pair);
   } else {
@@ -354,28 +352,26 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
 
 static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                        unsigned RegUnit) {
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end()) {
-    RegUnits.push_back(RegisterMaskPair(RegUnit, 0));
+    RegUnits.push_back(RegisterMaskPair(RegUnit, LaneBitmask::getNone()));
   } else {
-    I->LaneMask = 0;
+    I->LaneMask = LaneBitmask::getNone();
   }
 }
 
 static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                            RegisterMaskPair Pair) {
   unsigned RegUnit = Pair.RegUnit;
-  assert(Pair.LaneMask != 0);
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  assert(Pair.LaneMask.any());
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I != RegUnits.end()) {
     I->LaneMask &= ~Pair.LaneMask;
-    if (I->LaneMask == 0)
+    if (I->LaneMask.none())
       RegUnits.erase(I);
   }
 }
@@ -386,14 +382,15 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
     bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
   if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
     const LiveInterval &LI = LIS.getInterval(RegUnit);
-    LaneBitmask Result = 0;
+    LaneBitmask Result;
     if (TrackLaneMasks && LI.hasSubRanges()) {
         for (const LiveInterval::SubRange &SR : LI.subranges()) {
           if (Property(SR, Pos))
             Result |= SR.LaneMask;
         }
     } else if (Property(LI, Pos)) {
-      Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u;
+      Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+                              : LaneBitmask::getAll();
     }
 
     return Result;
@@ -403,7 +400,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
     // for physical registers on targets with many registers (GPUs).
     if (LR == nullptr)
       return SafeDefault;
-    return Property(*LR, Pos) ? ~0u : 0;
+    return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
   }
 }
 
@@ -411,7 +408,8 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
                                   const MachineRegisterInfo &MRI,
                                   bool TrackLaneMasks, unsigned RegUnit,
                                   SlotIndex Pos) {
-  return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+  return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getAll(),
                               [](const LiveRange &LR, SlotIndex Pos) {
                                 return LR.liveAt(Pos);
                               });
@@ -478,10 +476,10 @@ class RegisterOperandsCollector {
   void pushReg(unsigned Reg,
                SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-      addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u));
+      addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
     } else if (MRI.isAllocatable(Reg)) {
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-        addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+        addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
     }
   }
 
@@ -516,7 +514,7 @@ class RegisterOperandsCollector {
       addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
     } else if (MRI.isAllocatable(Reg)) {
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-        addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+        addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
     }
   }
 
@@ -567,11 +565,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
     // of a subregister def we need a read-undef flag.
     unsigned RegUnit = I->RegUnit;
     if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
-        AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0)
+        AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
       AddFlagsMI->setRegisterDefReadUndef(RegUnit);
 
     LaneBitmask ActualDef = I->LaneMask & LiveAfter;
-    if (ActualDef == 0) {
+    if (ActualDef.none()) {
       I = Defs.erase(I);
     } else {
       I->LaneMask = ActualDef;
@@ -582,7 +580,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
     LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
                                             Pos.getBaseIndex());
     LaneBitmask LaneMask = I->LaneMask & LiveBefore;
-    if (LaneMask == 0) {
+    if (LaneMask.none()) {
       I = Uses.erase(I);
     } else {
       I->LaneMask = LaneMask;
@@ -596,7 +594,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
         continue;
       LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
                                              Pos.getDeadSlot());
-      if (LiveAfter == 0)
+      if (LiveAfter.none())
         AddFlagsMI->setRegisterDefReadUndef(RegUnit);
     }
   }
@@ -673,17 +671,16 @@ void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
 
 void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
     SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
-  assert(Pair.LaneMask != 0);
+  assert(Pair.LaneMask.any());
 
   unsigned RegUnit = Pair.RegUnit;
-  auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(),
-                        [RegUnit](const RegisterMaskPair &Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+    return Other.RegUnit == RegUnit;
+  });
   LaneBitmask PrevMask;
   LaneBitmask NewMask;
   if (I == LiveInOrOut.end()) {
-    PrevMask = 0;
+    PrevMask = LaneBitmask::getNone();
     NewMask = Pair.LaneMask;
     LiveInOrOut.push_back(Pair);
   } else {
@@ -738,14 +735,15 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
     LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
 
     LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
-    if (LiveOut != 0) {
+    if (LiveOut.any()) {
       discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
       // Retroactively model effects on pressure of the live out lanes.
-      increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut);
+      increaseSetPressure(CurrSetPressure, *MRI, Reg, LaneBitmask::getNone(),
+                          LiveOut);
       PreviousMask = LiveOut;
     }
 
-    if (NewMask == 0) {
+    if (NewMask.none()) {
       // Add a 0 entry to LiveUses as a marker that the complete vreg has become
       // dead.
       if (TrackLaneMasks && LiveUses != nullptr)
@@ -762,26 +760,25 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
   // Generate liveness for uses.
   for (const RegisterMaskPair &Use : RegOpers.Uses) {
     unsigned Reg = Use.RegUnit;
-    assert(Use.LaneMask != 0);
+    assert(Use.LaneMask.any());
     LaneBitmask PreviousMask = LiveRegs.insert(Use);
     LaneBitmask NewMask = PreviousMask | Use.LaneMask;
     if (NewMask == PreviousMask)
       continue;
 
     // Did the register just become live?
-    if (PreviousMask == 0) {
+    if (PreviousMask.none()) {
       if (LiveUses != nullptr) {
         if (!TrackLaneMasks) {
           addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
         } else {
-          auto I = std::find_if(LiveUses->begin(), LiveUses->end(),
-                                [Reg](const RegisterMaskPair Other) {
-                                return Other.RegUnit == Reg;
-                                });
+          auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+            return Other.RegUnit == Reg;
+          });
           bool IsRedef = I != LiveUses->end();
           if (IsRedef) {
             // ignore re-defs here...
-            assert(I->LaneMask == 0);
+            assert(I->LaneMask.none());
             removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
           } else {
             addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
@@ -792,7 +789,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
       // Discover live outs if this may be the first occurance of this register.
       if (RequireIntervals) {
         LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
-        if (LiveOut != 0)
+        if (LiveOut.any())
           discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
       }
     }
@@ -803,7 +800,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
     for (const RegisterMaskPair &Def : RegOpers.Defs) {
       unsigned RegUnit = Def.RegUnit;
       if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
-          (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0)
+          (LiveRegs.contains(RegUnit) & Def.LaneMask).none())
         UntiedDefs.insert(RegUnit);
     }
   }
@@ -819,9 +816,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
     static_cast<RegionPressure&>(P).openTop(CurrPos);
 
   // Find the previous instruction.
-  do
-    --CurrPos;
-  while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+  CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
 
   SlotIndex SlotIdx;
   if (RequireIntervals)
@@ -871,7 +866,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
     unsigned Reg = Use.RegUnit;
     LaneBitmask LiveMask = LiveRegs.contains(Reg);
     LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
-    if (LiveIn != 0) {
+    if (LiveIn.any()) {
       discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
       increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
       LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
@@ -879,7 +874,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
     // Kill liveness at last uses.
     if (RequireIntervals) {
       LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
-      if (LastUseMask != 0) {
+      if (LastUseMask.any()) {
         LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
         decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
       }
@@ -897,9 +892,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
   bumpDeadDefs(RegOpers.DeadDefs);
 
   // Find the next instruction.
-  do
-    ++CurrPos;
-  while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+  CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end());
 }
 
 void RegPressureTracker::advance() {
@@ -1192,8 +1185,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
       unsigned SubRegIdx = MO.getSubReg();
       LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
       LastUseMask &= ~UseMask;
-      if (LastUseMask == 0)
-        return 0;
+      if (LastUseMask.none())
+        return LaneBitmask::getNone();
     }
   }
   return LastUseMask;
@@ -1202,7 +1195,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
 LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
                                                SlotIndex Pos) const {
   assert(RequireIntervals);
-  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getAll(),
       [](const LiveRange &LR, SlotIndex Pos) {
         return LR.liveAt(Pos);
       });
@@ -1212,7 +1206,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
                                                  SlotIndex Pos) const {
   assert(RequireIntervals);
   return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
-                              Pos.getBaseIndex(), 0,
+                              Pos.getBaseIndex(), LaneBitmask::getNone(),
       [](const LiveRange &LR, SlotIndex Pos) {
         const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
         return S != nullptr && S->end == Pos.getRegSlot();
@@ -1222,7 +1216,8 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
 LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
                                                  SlotIndex Pos) const {
   assert(RequireIntervals);
-  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u,
+  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getNone(),
       [](const LiveRange &LR, SlotIndex Pos) {
         const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
         return S != nullptr && S->start < Pos.getRegSlot(true) &&
@@ -1253,7 +1248,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
     for (const RegisterMaskPair &Use : RegOpers.Uses) {
       unsigned Reg = Use.RegUnit;
       LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
-      if (LastUseMask == 0)
+      if (LastUseMask.none())
         continue;
       // The LastUseMask is queried from the liveness information of instruction
       // which may be further down the schedule. Some lanes may actually not be
@@ -1263,7 +1258,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
       SlotIndex CurrIdx = getCurrSlot();
       LastUseMask
         = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
-      if (LastUseMask == 0)
+      if (LastUseMask.none())
         continue;
 
       LaneBitmask LiveMask = LiveRegs.contains(Reg);
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 6b80179..fdf741f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -34,33 +34,12 @@ using namespace llvm;
 void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
   for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
     LaneBitmask UnitMask = (*RUI).second;
-    if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+    if (UnitMask.none() || (LaneMask & UnitMask).any())
       RegUnitsAvailable.reset((*RUI).first);
   }
 }
 
-void RegScavenger::initRegState() {
-  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
-         IE = Scavenged.end(); I != IE; ++I) {
-    I->Reg = 0;
-    I->Restore = nullptr;
-  }
-
-  // All register units start out unused.
-  RegUnitsAvailable.set();
-
-  // Live-in registers are in use.
-  for (const auto &LI : MBB->liveins())
-    setRegUsed(LI.PhysReg, LI.LaneMask);
-
-  // Pristine CSRs are also unavailable.
-  const MachineFunction &MF = *MBB->getParent();
-  BitVector PR = MF.getFrameInfo()->getPristineRegs(MF);
-  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
-    setRegUsed(I);
-}
-
-void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+void RegScavenger::init(MachineBasicBlock &MBB) {
   MachineFunction &MF = *MBB.getParent();
   TII = MF.getSubtarget().getInstrInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
@@ -69,11 +48,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
   assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
          "Target changed?");
 
-  // It is not possible to use the register scavenger after late optimization
-  // passes that don't preserve accurate liveness information.
-  assert(MRI->tracksLiveness() &&
-         "Cannot use register scavenger with inaccurate liveness");
-
   // Self-initialize.
   if (!this->MBB) {
     NumRegUnits = TRI->getNumRegUnits();
@@ -84,16 +58,56 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
   }
   this->MBB = &MBB;
 
-  initRegState();
+  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I) {
+    I->Reg = 0;
+    I->Restore = nullptr;
+  }
+
+  // All register units start out unused.
+  RegUnitsAvailable.set();
+
+  // Pristine CSRs are not available.
+  BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setRegUsed(I);
 
   Tracking = false;
 }
 
+void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
+  for (const auto &LI : MBB.liveins())
+    setRegUsed(LI.PhysReg, LI.LaneMask);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+  init(MBB);
+  setLiveInsUsed(MBB);
+}
+
+void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
+  init(MBB);
+  // Merge live-ins of successors to get live-outs.
+  for (const MachineBasicBlock *Succ : MBB.successors())
+    setLiveInsUsed(*Succ);
+
+  // Move internal iterator at the last instruction of the block.
+  if (MBB.begin() != MBB.end()) {
+    MBBI = std::prev(MBB.end());
+    Tracking = true;
+  }
+}
+
 void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
   for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
     BV.set(*RUI);
 }
 
+void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) {
+  for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+    BV.reset(*RUI);
+}
+
 void RegScavenger::determineKillsAndDefs() {
   assert(Tracking && "Must be tracking to determine kills and defs");
 
@@ -245,6 +259,48 @@ void RegScavenger::forward() {
   setUsed(DefRegUnits);
 }
 
+void RegScavenger::backward() {
+  assert(Tracking && "Must be tracking to determine kills and defs");
+
+  const MachineInstr &MI = *MBBI;
+  // Defined or clobbered registers are available now.
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isRegMask()) {
+      for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
+           ++RU) {
+        for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+          if (MO.clobbersPhysReg(*RURI)) {
+            RegUnitsAvailable.set(RU);
+            break;
+          }
+        }
+      }
+    } else if (MO.isReg() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+          isReserved(Reg))
+        continue;
+      addRegUnits(RegUnitsAvailable, Reg);
+    }
+  }
+  // Mark read registers as unavailable.
+  for (const MachineOperand &MO : MI.uses()) {
+    if (MO.isReg() && MO.readsReg()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+          isReserved(Reg))
+        continue;
+      removeRegUnits(RegUnitsAvailable, Reg);
+    }
+  }
+
+  if (MBBI == MBB->begin()) {
+    MBBI = MachineBasicBlock::iterator(nullptr);
+    Tracking = false;
+  } else
+    --MBBI;
+}
+
 bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
   if (includeReserved && isReserved(Reg))
     return true;
@@ -358,7 +414,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   for (const MachineOperand &MO : MI.operands()) {
     if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
         !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-      Candidates.reset(MO.getReg());
+      for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+        Candidates.reset(*AI);
   }
 
   // Try to find a register that's unused if there is one, as then we won't
@@ -380,7 +437,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
 
   // Find an available scavenging slot with size and alignment matching
   // the requirements of the class RC.
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned NeedSize = RC->getSize();
   unsigned NeedAlign = RC->getAlignment();
 
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 5cf3e57..66f1966 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ip-regalloc"
 
-cl::opt<bool> DumpRegUsage(
+static cl::opt<bool> DumpRegUsage(
     "print-regusage", cl::init(false), cl::Hidden,
     cl::desc("print register usage details collected for analysis."));
 
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index ea952d9..2f7ee8b 100644
--- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -48,7 +48,7 @@ public:
   static char ID;
   RenameIndependentSubregs() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Rename Disconnected Subregister Components";
   }
 
@@ -184,7 +184,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
     unsigned MergedID = ~0u;
     for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
       const LiveInterval::SubRange &SR = *SRInfo.SR;
-      if ((SR.LaneMask & LaneMask) == 0)
+      if ((SR.LaneMask & LaneMask).none())
         continue;
       SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
       Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
@@ -219,24 +219,23 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
     if (!MO.isDef() && !MO.readsReg())
       continue;
 
-    MachineInstr &MI = *MO.getParent();
-
-    SlotIndex Pos = LIS->getInstructionIndex(MI);
+    SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+    Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+                     : Pos.getBaseIndex();
     unsigned SubRegIdx = MO.getSubReg();
     LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
 
     unsigned ID = ~0u;
     for (const SubRangeInfo &SRInfo : SubRangeInfos) {
       const LiveInterval::SubRange &SR = *SRInfo.SR;
-      if ((SR.LaneMask & LaneMask) == 0)
+      if ((SR.LaneMask & LaneMask).none())
         continue;
-      LiveRange::const_iterator I = SR.find(Pos);
-      if (I == SR.end())
+      const VNInfo *VNI = SR.getVNInfoAt(Pos);
+      if (VNI == nullptr)
         continue;
 
-      const VNInfo &VNI = *I->valno;
       // Map to local representant ID.
-      unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+      unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
       // Global ID
       ID = Classes[LocalID + SRInfo.Index];
       break;
@@ -354,19 +353,24 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
     if (I == 0)
       LI.clear();
     LIS->constructMainRangeFromSubranges(LI);
+    // A def of a subregister may be a use of other register lanes. Replacing
+    // such a def with a def of a different register will eliminate the use,
+    // and may cause the recorded live range to be larger than the actual
+    // liveness in the program IR.
+    LIS->shrinkToUses(&LI);
   }
 }
 
 bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
   // Skip renaming if liveness of subregister is not tracked.
-  if (!MF.getSubtarget().enableSubRegLiveness())
+  MRI = &MF.getRegInfo();
+  if (!MRI->subRegLivenessEnabled())
     return false;
 
   DEBUG(dbgs() << "Renaming independent subregister live ranges in "
         << MF.getName() << '\n');
 
   LIS = &getAnalysis<LiveIntervals>();
-  MRI = &MF.getRegInfo();
   TII = MF.getSubtarget().getInstrInfo();
 
   // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
new file mode 100644
index 0000000..4519641
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -0,0 +1,67 @@
+//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a pass that will conditionally reset a machine
+/// function as if it was just created. This is used to provide a fallback
+/// mechanism when GlobalISel fails, thus the condition for the reset to
+/// happen is that the MachineFunction has the FailedISel property.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "reset-machine-function"
+
+STATISTIC(NumFunctionsReset, "Number of functions reset");
+
+namespace {
+  class ResetMachineFunction : public MachineFunctionPass {
+    /// Tells whether or not this pass should emit a fallback
+    /// diagnostic when it resets a function.
+    bool EmitFallbackDiag;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ResetMachineFunction(bool EmitFallbackDiag = false)
+        : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+
+    StringRef getPassName() const override { return "ResetMachineFunction"; }
+
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      if (MF.getProperties().hasProperty(
+              MachineFunctionProperties::Property::FailedISel)) {
+        DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
+        ++NumFunctionsReset;
+        MF.reset();
+        if (EmitFallbackDiag) {
+          const Function &F = *MF.getFunction();
+          DiagnosticInfoISelFallback DiagFallback(F);
+          F.getContext().diagnose(DiagFallback);
+        }
+        return true;
+      }
+      return false;
+    }
+
+  };
+} // end anonymous namespace
+
+char ResetMachineFunction::ID = 0;
+INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
+                "reset machine function if ISel failed", false, false)
+
+MachineFunctionPass *
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
+  return new ResetMachineFunction(EmitFallbackDiag);
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index 4a1b995..2b82df2 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -52,17 +52,6 @@ using namespace llvm::safestack;
 
 #define DEBUG_TYPE "safestack"
 
-enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
-
-static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
-    cl::Hidden, cl::init(ThreadLocalUSP),
-    cl::desc("Type of storage for the unsafe stack pointer"),
-    cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
-                          "Thread-local storage"),
-               clEnumValN(SingleThreadUSP, "single-thread",
-                          "Non-thread-local storage"),
-               clEnumValEnd));
-
 namespace llvm {
 
 STATISTIC(NumFunctions, "Total number of functions");
@@ -124,9 +113,6 @@ class SafeStack : public FunctionPass {
   /// might expect to appear on the stack on most common targets.
   enum { StackAlignment = 16 };
 
-  /// \brief Build a value representing a pointer to the unsafe stack pointer.
-  Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
-
   /// \brief Return the value of the stack canary.
   Value *getStackGuard(IRBuilder<> &IRB, Function &F);
 
@@ -356,46 +342,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
   return true;
 }
 
-Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
-  // Check if there is a target-specific location for the unsafe stack pointer.
-  if (TL)
-    if (Value *V = TL->getSafeStackPointerLocation(IRB))
-      return V;
-
-  // Otherwise, assume the target links with compiler-rt, which provides a
-  // thread-local variable with a magic name.
-  Module &M = *F.getParent();
-  const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
-  auto UnsafeStackPtr =
-      dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
-
-  bool UseTLS = USPStorage == ThreadLocalUSP;
-
-  if (!UnsafeStackPtr) {
-    auto TLSModel = UseTLS ?
-        GlobalValue::InitialExecTLSModel :
-        GlobalValue::NotThreadLocal;
-    // The global variable is not defined yet, define it ourselves.
-    // We use the initial-exec TLS model because we do not support the
-    // variable living anywhere other than in the main executable.
-    UnsafeStackPtr = new GlobalVariable(
-        M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
-        UnsafeStackPtrVar, nullptr, TLSModel);
-  } else {
-    // The variable exists, check its type and attributes.
-    if (UnsafeStackPtr->getValueType() != StackPtrTy)
-      report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
-    if (UseTLS != UnsafeStackPtr->isThreadLocal())
-      report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
-                         (UseTLS ? "" : "not ") + "be thread-local");
-  }
-  return UnsafeStackPtr;
-}
-
 Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
-  Value *StackGuardVar = nullptr;
-  if (TL)
-    StackGuardVar = TL->getIRStackGuard(IRB);
+  Value *StackGuardVar = TL->getIRStackGuard(IRB);
   if (!StackGuardVar)
     StackGuardVar =
         F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
@@ -752,7 +700,9 @@ bool SafeStack::runOnFunction(Function &F) {
     return false;
   }
 
-  TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+  if (!TM)
+    report_fatal_error("Target machine is required");
+  TL = TM->getSubtargetImpl(F)->getTargetLowering();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
 
   ++NumFunctions;
@@ -764,7 +714,7 @@ bool SafeStack::runOnFunction(Function &F) {
 
   // Collect all points where stack gets unwound and needs to be restored
   // This is only necessary because the runtime (setjmp and unwind code) is
-  // not aware of the unsafe stack and won't unwind/restore it prorerly.
+  // not aware of the unsafe stack and won't unwind/restore it properly.
   // To work around this problem without changing the runtime, we insert
   // instrumentation to restore the unsafe stack pointer when necessary.
   SmallVector<Instruction *, 4> StackRestorePoints;
@@ -786,7 +736,7 @@ bool SafeStack::runOnFunction(Function &F) {
     ++NumUnsafeStackRestorePointsFunctions;
 
   IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
-  UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
+  UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB);
 
   // Load the current stack pointer (we'll also use it as a base pointer).
   // FIXME: use a dedicated register for it ?
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 795eb8d..7fbeadd 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -214,10 +214,12 @@ void StackColoring::calculateLiveIntervals() {
       unsigned AllocaNo = It.second.AllocaNo;
 
       if (IsStart) {
-        assert(!Started.test(AllocaNo));
-        Started.set(AllocaNo);
-        Ended.reset(AllocaNo);
-        Start[AllocaNo] = InstNo;
+        assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
+        if (!Started.test(AllocaNo)) {
+          Started.set(AllocaNo);
+          Ended.reset(AllocaNo);
+          Start[AllocaNo] = InstNo;
+        }
       } else {
         assert(!Ended.test(AllocaNo));
         if (Started.test(AllocaNo)) {
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
index fb433c1..7d4dbd1 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -132,6 +132,14 @@ void StackLayout::computeLayout() {
   // If this is replaced with something smarter, it must preserve the property
   // that the first object is always at the offset 0 in the stack frame (for
   // StackProtectorSlot), or handle stack protector in some other way.
+
+  // Sort objects by size (largest first) to reduce fragmentation.
+  if (StackObjects.size() > 2)
+    std::stable_sort(StackObjects.begin() + 1, StackObjects.end(),
+                     [](const StackObject &a, const StackObject &b) {
+                       return a.Size > b.Size;
+                     });
+
   for (auto &Obj : StackObjects)
     layoutObject(Obj);
 
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index efde61e..427d952 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -139,8 +139,7 @@ void SUnit::removePred(const SDep &D) {
       SDep P = D;
       P.setSUnit(this);
       SUnit *N = D.getSUnit();
-      SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
-                                                       N->Succs.end(), P);
+      SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
       assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
       N->Succs.erase(Succ);
       Preds.erase(I);
@@ -311,10 +310,20 @@ void SUnit::biasCriticalPath() {
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
+  if (this == &DAG->ExitSU)
+    OS << "ExitSU";
+  else if (this == &DAG->EntrySU)
+    OS << "EntrySU";
+  else
+    OS << "SU(" << NodeNum << ")";
+}
+
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  dbgs() << "SU(" << NodeNum << "): ";
+  print(dbgs(), G);
+  dbgs() << ": ";
   G->dumpNode(this);
 }
 
@@ -338,12 +347,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
          I != E; ++I) {
       dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        dbgs() << "val "; break;
-      case SDep::Anti:        dbgs() << "anti"; break;
-      case SDep::Output:      dbgs() << "out "; break;
-      case SDep::Order:       dbgs() << "ch  "; break;
+      case SDep::Data:   dbgs() << "data "; break;
+      case SDep::Anti:   dbgs() << "anti "; break;
+      case SDep::Output: dbgs() << "out  "; break;
+      case SDep::Order:  dbgs() << "ord  "; break;
       }
-      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+      I->getSUnit()->print(dbgs(), G);
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
@@ -358,12 +367,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
          I != E; ++I) {
       dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        dbgs() << "val "; break;
-      case SDep::Anti:        dbgs() << "anti"; break;
-      case SDep::Output:      dbgs() << "out "; break;
-      case SDep::Order:       dbgs() << "ch  "; break;
+      case SDep::Data:   dbgs() << "data "; break;
+      case SDep::Anti:   dbgs() << "anti "; break;
+      case SDep::Output: dbgs() << "out  "; break;
+      case SDep::Order:  dbgs() << "ord  "; break;
       }
-      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+      I->getSUnit()->print(dbgs(), G);
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 22bfd4d..611c5a7 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -77,7 +77,7 @@ static unsigned getReductionSize() {
 static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "{ ";
-  for (auto *su : L) {
+  for (const SUnit *su : L) {
     dbgs() << "SU(" << su->NodeNum << ")";
     if (su != L.back())
       dbgs() << ", ";
@@ -142,9 +142,7 @@ static void getUnderlyingObjects(const Value *V,
     SmallVector<Value *, 4> Objs;
     GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
 
-    for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
-         I != IE; ++I) {
-      V = *I;
+    for (Value *V : Objs) {
       if (!Visited.insert(V).second)
         continue;
       if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -164,7 +162,7 @@ static void getUnderlyingObjects(const Value *V,
 /// information and it can be tracked to a normal reference to a known
 /// object, return the Value for that object.
 static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
-                                         const MachineFrameInfo *MFI,
+                                         const MachineFrameInfo &MFI,
                                          UnderlyingObjectsVector &Objects,
                                          const DataLayout &DL) {
   auto allMMOsOkay = [&]() {
@@ -178,16 +176,16 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
         // overlapping locations. The client code calling this function assumes
         // this is not the case. So return a conservative answer of no known
         // object.
-        if (MFI->hasTailCall())
+        if (MFI.hasTailCall())
           return false;
 
         // For now, ignore PseudoSourceValues which may alias LLVM IR values
         // because the code that uses this function has no way to cope with
         // such aliases.
-        if (PSV->isAliased(MFI))
+        if (PSV->isAliased(&MFI))
           return false;
 
-        bool MayAlias = PSV->mayAlias(MFI);
+        bool MayAlias = PSV->mayAlias(&MFI);
         Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
       } else if (const Value *V = MMO->getValue()) {
         SmallVector<Value *, 4> Objs;
@@ -249,32 +247,27 @@ void ScheduleDAGInstrs::exitRegion() {
 void ScheduleDAGInstrs::addSchedBarrierDeps() {
   MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
   ExitSU.setInstr(ExitMI);
-  bool AllDepKnown = ExitMI &&
-    (ExitMI->isCall() || ExitMI->isBarrier());
-  if (ExitMI && AllDepKnown) {
-    // If it's a call or a barrier, add dependencies on the defs and uses of
-    // instruction.
-    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = ExitMI->getOperand(i);
+  // Add dependencies on the defs and uses of the instruction.
+  if (ExitMI) {
+    for (const MachineOperand &MO : ExitMI->operands()) {
       if (!MO.isReg() || MO.isDef()) continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
-      else if (MO.readsReg()) // ignore undef operands
-        addVRegUseDeps(&ExitSU, i);
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+        addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
+      }
     }
-  } else {
+  }
+  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
     // For others, e.g. fallthrough, conditional branch, assume the exit
     // uses all the registers that are livein to the successor blocks.
-    assert(Uses.empty() && "Uses in set before adding deps?");
-    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-           SE = BB->succ_end(); SI != SE; ++SI)
-      for (const auto &LI : (*SI)->liveins()) {
+    for (const MachineBasicBlock *Succ : BB->successors()) {
+      for (const auto &LI : Succ->liveins()) {
         if (!Uses.contains(LI.PhysReg))
           Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
       }
+    }
   }
 }
 
@@ -326,6 +319,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   MachineInstr *MI = SU->getInstr();
   MachineOperand &MO = MI->getOperand(OperIdx);
+  unsigned Reg = MO.getReg();
+  // We do not need to track any dependencies for constant registers.
+  if (MRI.isConstantPhysReg(Reg))
+    return;
 
   // Optionally add output and anti dependencies. For anti
   // dependencies we use a latency of 0 because for a multi-issue
@@ -334,8 +331,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   // TODO: Using a latency of 1 here for output dependencies assumes
   //       there's no cost for reusing registers.
   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
-       Alias.isValid(); ++Alias) {
+  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
     if (!Defs.contains(*Alias))
       continue;
     for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
@@ -362,13 +358,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
-    Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+    Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
     if (RemoveKillFlags)
       MO.setIsKill(false);
-  }
-  else {
+  } else {
     addPhysRegDataDeps(SU, OperIdx);
-    unsigned Reg = MO.getReg();
 
     // clear this register's use list
     if (Uses.contains(Reg))
@@ -404,7 +398,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
   // No point in tracking lanemasks if we don't have interesting subregisters.
   const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
   if (!RC.HasDisjunctSubRegs)
-    return ~0u;
+    return LaneBitmask::getAll();
 
   unsigned SubReg = MO.getSubReg();
   if (SubReg == 0)
@@ -430,14 +424,14 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
     DefLaneMask = getLaneMaskForMO(MO);
     // If we have a <read-undef> flag, none of the lane values comes from an
     // earlier instruction.
-    KillLaneMask = IsKill ? ~0u : DefLaneMask;
+    KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
 
     // Clear undef flag, we'll re-add it later once we know which subregister
     // Def is first.
     MO.setIsUndef(false);
   } else {
-    DefLaneMask = ~0u;
-    KillLaneMask = ~0u;
+    DefLaneMask = LaneBitmask::getAll();
+    KillLaneMask = LaneBitmask::getAll();
   }
 
   if (MO.isDead()) {
@@ -450,12 +444,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
          E = CurrentVRegUses.end(); I != E; /*empty*/) {
       LaneBitmask LaneMask = I->LaneMask;
       // Ignore uses of other lanes.
-      if ((LaneMask & KillLaneMask) == 0) {
+      if ((LaneMask & KillLaneMask).none()) {
         ++I;
         continue;
       }
 
-      if ((LaneMask & DefLaneMask) != 0) {
+      if ((LaneMask & DefLaneMask).any()) {
         SUnit *UseSU = I->SU;
         MachineInstr *Use = UseSU->getInstr();
         SDep Dep(SU, SDep::Data, Reg);
@@ -467,7 +461,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 
       LaneMask &= ~KillLaneMask;
       // If we found a Def for all lanes of this use, remove it from the list.
-      if (LaneMask != 0) {
+      if (LaneMask.any()) {
         I->LaneMask = LaneMask;
         ++I;
       } else
@@ -490,7 +484,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
                                      CurrentVRegDefs.end())) {
     // Ignore defs for other lanes.
-    if ((V2SU.LaneMask & LaneMask) == 0)
+    if ((V2SU.LaneMask & LaneMask).none())
       continue;
     // Add an output dependence.
     SUnit *DefSU = V2SU.SU;
@@ -513,11 +507,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
     LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
     V2SU.SU = SU;
     V2SU.LaneMask = OverlapMask;
-    if (NonOverlapMask != 0)
+    if (NonOverlapMask.any())
       CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
   }
   // If there was no CurrentVRegDefs entry for some lanes yet, create one.
-  if (LaneMask != 0)
+  if (LaneMask.any())
     CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
 }
 
@@ -533,7 +527,8 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
   unsigned Reg = MO.getReg();
 
   // Remember the use. Data dependencies will be added when we find the def.
-  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
+                                        : LaneBitmask::getAll();
   CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
 
   // Add antidependences to the following defs of the vreg.
@@ -541,7 +536,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
                                      CurrentVRegDefs.end())) {
     // Ignore defs for unrelated lanes.
     LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
-    if ((PrevDefLaneMask & LaneMask) == 0)
+    if ((PrevDefLaneMask & LaneMask).none())
       continue;
     if (V2SU.SU == SU)
       continue;
@@ -554,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 /// (like a call or something with unmodeled side effects).
 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
   return MI->isCall() || MI->hasUnmodeledSideEffects() ||
-         (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
+         (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
 }
 
 /// This returns true if the two MIs need a chain edge between them.
@@ -621,8 +616,8 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 /// Check whether two objects need a chain edge and add it if needed.
 void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
                                             unsigned Latency) {
-  if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
-		       SUb->getInstr())) {
+  if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
+                       SUb->getInstr())) {
     SDep Dep(SUa, SDep::MayAliasMem);
     Dep.setLatency(Latency);
     SUb->addPred(Dep);
@@ -668,10 +663,10 @@ void ScheduleDAGInstrs::initSUnits() {
     // within an out-of-order core. These are identified by BufferSize=1.
     if (SchedModel.hasInstrSchedModel()) {
       const MCSchedClassDesc *SC = getSchedClass(SU);
-      for (TargetSchedModel::ProcResIter
-             PI = SchedModel.getWriteProcResBegin(SC),
-             PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+      for (const MCWriteProcResEntry &PRE :
+           make_range(SchedModel.getWriteProcResBegin(SC),
+                      SchedModel.getWriteProcResEnd(SC))) {
+        switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
         case 0:
           SU->hasReservedResource = true;
           break;
@@ -686,44 +681,6 @@ void ScheduleDAGInstrs::initSUnits() {
   }
 }
 
-void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
-  const MachineInstr *MI = SU->getInstr();
-  for (const MachineOperand &MO : MI->operands()) {
-    if (!MO.isReg())
-      continue;
-    if (!MO.readsReg())
-      continue;
-    if (TrackLaneMasks && !MO.isUse())
-      continue;
-
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    // Ignore re-defs.
-    if (TrackLaneMasks) {
-      bool FoundDef = false;
-      for (const MachineOperand &MO2 : MI->operands()) {
-        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
-          FoundDef = true;
-          break;
-        }
-      }
-      if (FoundDef)
-        continue;
-    }
-
-    // Record this local VReg use.
-    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
-    for (; UI != VRegUses.end(); ++UI) {
-      if (UI->SU == SU)
-        break;
-    }
-    if (UI == VRegUses.end())
-      VRegUses.insert(VReg2SUnit(Reg, 0, SU));
-  }
-}
-
 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
 
   /// Current total number of SUs in map.
@@ -901,9 +858,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   CurrentVRegDefs.setUniverse(NumVirtRegs);
   CurrentVRegUses.setUniverse(NumVirtRegs);
 
-  VRegUses.clear();
-  VRegUses.setUniverse(NumVirtRegs);
-
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
   addSchedBarrierDeps();
@@ -926,8 +880,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     assert(SU && "No SUnit mapped to this MI");
 
     if (RPTracker) {
-      collectVRegUses(SU);
-
       RegisterOperands RegOpers;
       RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
       if (TrackLaneMasks) {
@@ -957,12 +909,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (!MO.isReg() || !MO.isDef())
         continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         addPhysRegDeps(SU, j);
-      else {
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         HasVRegDef = true;
         addVRegDefDeps(SU, j);
       }
@@ -977,13 +926,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (!MO.isReg() || !MO.isUse())
         continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         addPhysRegDeps(SU, j);
-      else if (MO.readsReg()) // ignore undef operands
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
         addVRegUseDeps(SU, j);
+      }
     }
 
     // If we haven't seen any uses in this scheduling region, create a
@@ -1023,7 +970,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     }
 
     // If it's not a store or a variant load, we're done.
-    if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+    if (!MI.mayStore() &&
+        !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
       continue;
 
     // Always add dependecy edge to BarrierChain if present.
@@ -1200,9 +1148,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
   LiveRegs.reset();
 
   // Examine the live-in regs of all successors.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-       SE = BB->succ_end(); SI != SE; ++SI) {
-    for (const auto &LI : (*SI)->liveins()) {
+  for (const MachineBasicBlock *Succ : BB->successors()) {
+    for (const auto &LI : Succ->liveins()) {
       // Repeat, for reg and all subregs.
       for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
            SubRegs.isValid(); ++SubRegs)
@@ -1225,7 +1172,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
   // might set it on too many operands.  We will clear as many flags as we
   // can though.
   MachineBasicBlock::instr_iterator Begin = MI->getIterator();
-  MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
+  MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
   while (Begin != End) {
     if (NewKillState) {
       if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
@@ -1312,6 +1259,11 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
     // register is used multiple times we only set the kill flag on
     // the first use. Don't set kill flags on undef operands.
     killedRegs.reset();
+
+    // toggleKillFlag can append new operands (implicit defs), so using
+    // a range-based loop is not safe. The new operands will be appended
+    // at the end of the operand list and they don't need to be visited,
+    // so iterating until the currently last operand is ok.
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI.getOperand(i);
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
@@ -1337,13 +1289,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
 
       if (MO.isKill() != kill) {
         DEBUG(dbgs() << "Fixing " << MO << " in ");
-        // Warning: toggleKillFlag may invalidate MO.
         toggleKillFlag(&MI, MO);
         DEBUG(MI.dump());
         DEBUG({
           if (MI.getOpcode() == TargetOpcode::BUNDLE) {
             MachineBasicBlock::instr_iterator Begin = MI.getIterator();
-            MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+            MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
             while (++Begin != End)
               DEBUG(Begin->dump());
           }
@@ -1355,8 +1306,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
 
     // Mark any used register (that is not using undef) and subregs as
     // now live...
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
+    for (const MachineOperand &MO : MI.operands()) {
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
       unsigned Reg = MO.getReg();
       if ((Reg == 0) || MRI.isReserved(Reg)) continue;
@@ -1457,13 +1407,12 @@ public:
     // the subtree limit, then try to join it now since splitting subtrees is
     // only useful if multiple high-pressure paths are possible.
     unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
-    for (SUnit::const_pred_iterator
-           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
-      if (PI->getKind() != SDep::Data)
+    for (const SDep &PredDep : SU->Preds) {
+      if (PredDep.getKind() != SDep::Data)
         continue;
-      unsigned PredNum = PI->getSUnit()->NodeNum;
+      unsigned PredNum = PredDep.getSUnit()->NodeNum;
       if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
-        joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+        joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
 
       // Either link or merge the TreeData entry from the child to the parent.
       if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
@@ -1505,12 +1454,11 @@ public:
     R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
     assert(SubtreeClasses.getNumClasses() == RootSet.size()
            && "number of roots should match trees");
-    for (SparseSet<RootData>::const_iterator
-           RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
-      unsigned TreeID = SubtreeClasses[RI->NodeID];
-      if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
-        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
-      R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+    for (const RootData &Root : RootSet) {
+      unsigned TreeID = SubtreeClasses[Root.NodeID];
+      if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
+      R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
       // Note that SubInstrCount may be greater than InstrCount if we joined
       // subtrees across a cross edge. InstrCount will be attributed to the
       // original parent, while SubInstrCount will be attributed to the joined
@@ -1524,14 +1472,12 @@ public:
       DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
             << R.DFSNodeData[Idx].SubtreeID << '\n');
     }
-    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
-           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
-         I != E; ++I) {
-      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
-      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+    for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
+      unsigned PredTree = SubtreeClasses[P.first->NodeNum];
+      unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
       if (PredTree == SuccTree)
         continue;
-      unsigned Depth = I->first->getDepth();
+      unsigned Depth = P.first->getDepth();
       addConnection(PredTree, SuccTree, Depth);
       addConnection(SuccTree, PredTree, Depth);
     }
@@ -1553,9 +1499,8 @@ protected:
     // Four is the magic number of successors before a node is considered a
     // pinch point.
     unsigned NumDataSucs = 0;
-    for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
-           SE = PredSU->Succs.end(); SI != SE; ++SI) {
-      if (SI->getKind() == SDep::Data) {
+    for (const SDep &SuccDep : PredSU->Succs) {
+      if (SuccDep.getKind() == SDep::Data) {
         if (++NumDataSucs >= 4)
           return false;
       }
@@ -1575,10 +1520,9 @@ protected:
     do {
       SmallVectorImpl<SchedDFSResult::Connection> &Connections =
         R.SubtreeConnections[FromTree];
-      for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
-             I = Connections.begin(), E = Connections.end(); I != E; ++I) {
-        if (I->TreeID == ToTree) {
-          I->Level = std::max(I->Level, Depth);
+      for (SchedDFSResult::Connection &C : Connections) {
+        if (C.TreeID == ToTree) {
+          C.Level = std::max(C.Level, Depth);
           return;
         }
       }
@@ -1617,9 +1561,9 @@ public:
 } // anonymous
 
 static bool hasDataSucc(const SUnit *SU) {
-  for (SUnit::const_succ_iterator
-         SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
-    if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+  for (const SDep &SuccDep : SU->Succs) {
+    if (SuccDep.getKind() == SDep::Data &&
+        !SuccDep.getSUnit()->isBoundaryNode())
       return true;
   }
   return false;
@@ -1632,15 +1576,13 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
     llvm_unreachable("Top-down ILP metric is unimplemnted");
 
   SchedDFSImpl Impl(*this);
-  for (ArrayRef<SUnit>::const_iterator
-         SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
-    const SUnit *SU = &*SI;
-    if (Impl.isVisited(SU) || hasDataSucc(SU))
+  for (const SUnit &SU : SUnits) {
+    if (Impl.isVisited(&SU) || hasDataSucc(&SU))
       continue;
 
     SchedDAGReverseDFS DFS;
-    Impl.visitPreorder(SU);
-    DFS.follow(SU);
+    Impl.visitPreorder(&SU);
+    DFS.follow(&SU);
     for (;;) {
       // Traverse the leftmost path as far as possible.
       while (DFS.getPred() != DFS.getPredEnd()) {
@@ -1676,13 +1618,11 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
 /// connected to this tree, record the depth of the connection so that the
 /// nearest connected subtrees can be prioritized.
 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
-  for (SmallVectorImpl<Connection>::const_iterator
-         I = SubtreeConnections[SubtreeID].begin(),
-         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
-    SubtreeConnectLevels[I->TreeID] =
-      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
-    DEBUG(dbgs() << "  Tree: " << I->TreeID
-          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+  for (const Connection &C : SubtreeConnections[SubtreeID]) {
+    SubtreeConnectLevels[C.TreeID] =
+      std::max(SubtreeConnectLevels[C.TreeID], C.Level);
+    DEBUG(dbgs() << "  Tree: " << C.TreeID
+          << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 69c4870..83bc1ba 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -145,7 +145,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[StageCycle];
-        // FALLTHROUGH
+        LLVM_FALLTHROUGH;
       case InstrStage::Reserved:
         // Reserved FUs can conflict only with required ones.
         freeUnits &= ~RequiredScoreboard[StageCycle];
@@ -197,7 +197,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[cycle + i];
-        // FALLTHROUGH
+        LLVM_FALLTHROUGH;
       case InstrStage::Reserved:
         // Reserved FUs can conflict only with required ones.
         freeUnits &= ~RequiredScoreboard[cycle + i];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ecc6da..2c7bffe 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,14 +16,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -181,7 +182,7 @@ namespace {
     /// if things it uses can be simplified by bit propagation.
     /// If so, return true.
     bool SimplifyDemandedBits(SDValue Op) {
-      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      unsigned BitWidth = Op.getScalarValueSizeInBits();
       APInt Demanded = APInt::getAllOnesValue(BitWidth);
       return SimplifyDemandedBits(Op, Demanded);
     }
@@ -326,7 +327,7 @@ namespace {
 
     SDValue visitFADDForFMACombine(SDNode *N);
     SDValue visitFSUBForFMACombine(SDNode *N);
-    SDValue visitFMULForFMACombine(SDNode *N);
+    SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
@@ -334,12 +335,15 @@ namespace {
 
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 
+    SDValue foldSelectOfConstants(SDNode *N);
     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
                              SDValue N2, SDValue N3, ISD::CondCode CC,
                              bool NotExtCompare = false);
+    SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+                                   SDValue N2, SDValue N3, ISD::CondCode CC);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           const SDLoc &DL, bool foldBooleans = true);
 
@@ -356,6 +360,7 @@ namespace {
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
+    SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
@@ -374,9 +379,14 @@ namespace {
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue splitMergedValStore(StoreSDNode *ST);
     SDValue TransformFPLoadStorePair(SDNode *N);
     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+    SDValue reduceBuildVecToShuffle(SDNode *N);
+    SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
+                                  SDValue VecIn1, SDValue VecIn2,
+                                  unsigned LeftIdx);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -444,10 +454,11 @@ namespace {
     /// This is a helper function for MergeConsecutiveStores. When the source
     /// elements of the consecutive stores are all constants or all extracted
     /// vector elements, try to merge them into one larger store.
-    /// \return True if a merged store was created.
-    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
-                                         EVT MemVT, unsigned NumStores,
-                                         bool IsConstantSrc, bool UseVector);
+    /// \return number of stores that were merged into a merged store (always
+    /// a prefix of \p StoreNode).
+    bool MergeStoresOfConstantsOrVecElts(
+        SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+        bool IsConstantSrc, bool UseVector);
 
     /// This is a helper function for MergeConsecutiveStores.
     /// Stores that may be merged are placed in StoreNodes.
@@ -464,8 +475,10 @@ namespace {
 
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
-    /// \return True if some memory operations were changed.
-    bool MergeConsecutiveStores(StoreSDNode *N);
+    /// \return number of stores that were merged into a merged store (the
+    /// affected nodes are stored as a prefix in \p StoreNodes).
+    bool MergeConsecutiveStores(StoreSDNode *N,
+                                SmallVectorImpl<MemOpLink> &StoreNodes);
 
     /// \brief Try to transform a truncation where C is a constant:
     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
   ((DAGCombiner*)DC)->AddToWorklist(N);
 }
 
-void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
-  ((DAGCombiner*)DC)->removeFromWorklist(N);
-}
-
 SDValue TargetLowering::DAGCombinerInfo::
 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                               Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!Options->UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+      return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return 1;
@@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                                             LegalOperations, Depth+1),
                        Op.getOperand(0), Flags);
   case ISD::FSUB:
-    // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(Options.UnsafeFPMath);
-
     // fold (fneg (fsub 0, B)) -> B
     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
       if (N0CFP->isZero())
@@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   }
 }
 
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+  LHS = LHS.zextOrSelf(Bits);
+  RHS = RHS.zextOrSelf(Bits);
+}
+
 // Return true if this node is a setcc, or is a select_cc
 // that selects between the target values used for true and false, making it
 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
@@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
   return nullptr;
 }
 
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// int.
-static ConstantSDNode *isConstOrConstSplat(SDValue N) {
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
-    return CN;
-
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    BitVector UndefElements;
-    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
-    // BuildVectors can truncate their operands. Ignore that case here.
-    // FIXME: We blindly ignore splats which include undef which is overly
-    // pessimistic.
-    if (CN && UndefElements.none() &&
-        CN->getValueType(0) == N.getValueType().getScalarType())
-      return CN;
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+    return !(Const->isOpaque() && NoOpaques);
+  if (N.getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  for (const SDValue &Op : N->op_values()) {
+    if (Op.isUndef())
+      continue;
+    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+    if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+        (Const->isOpaque() && NoOpaques))
+      return false;
   }
-
-  return nullptr;
+  return true;
 }
 
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// float.
-static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
-  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
-    return CN;
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isNullValue();
+  return false;
+}
 
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    BitVector UndefElements;
-    ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+  return false;
+}
 
-    if (CN && UndefElements.none())
-      return CN;
-  }
+// Determines if it is a constant integer of all ones or a splatted vector of a
+// constant integer of all ones (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isAllOnesValue() &&
+           Splat->getAPIntValue().getBitWidth() == BitWidth;
+  return false;
+}
 
-  return nullptr;
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(const SDNode *N) {
+  return ISD::isBuildVectorOfConstantSDNodes(N) ||
+         ISD::isBuildVectorOfConstantFPSDNodes(N);
 }
 
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 }
 
 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
-  SDLoc dl(Load);
+  SDLoc DL(Load);
   EVT VT = Load->getValueType(0);
-  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
 
   DEBUG(dbgs() << "\nReplacing.9 ";
         Load->dump(&DAG);
@@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
 
 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   Replace = false;
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
     EVT MemVT = LD->getMemoryVT();
@@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
                                                        : ISD::EXTLOAD)
       : LD->getExtensionType();
     Replace = true;
-    return DAG.getExtLoad(ExtType, dl, PVT,
+    return DAG.getExtLoad(ExtType, DL, PVT,
                           LD->getChain(), LD->getBasePtr(),
                           MemVT, LD->getMemOperand());
   }
@@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   switch (Opc) {
   default: break;
   case ISD::AssertSext:
-    return DAG.getNode(ISD::AssertSext, dl, PVT,
+    return DAG.getNode(ISD::AssertSext, DL, PVT,
                        SExtPromoteOperand(Op.getOperand(0), PVT),
                        Op.getOperand(1));
   case ISD::AssertZext:
-    return DAG.getNode(ISD::AssertZext, dl, PVT,
+    return DAG.getNode(ISD::AssertZext, DL, PVT,
                        ZExtPromoteOperand(Op.getOperand(0), PVT),
                        Op.getOperand(1));
   case ISD::Constant: {
     unsigned ExtOpc =
       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-    return DAG.getNode(ExtOpc, dl, PVT, Op);
+    return DAG.getNode(ExtOpc, DL, PVT, Op);
   }
   }
 
   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
     return SDValue();
-  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
 }
 
 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
     return SDValue();
   EVT OldVT = Op.getValueType();
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   bool Replace = false;
   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   if (!NewOp.getNode())
@@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
 
   if (Replace)
     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
                      DAG.getValueType(OldVT));
 }
 
 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   EVT OldVT = Op.getValueType();
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   bool Replace = false;
   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   if (!NewOp.getNode())
@@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
 
   if (Replace)
     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
-  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
 }
 
 /// Promote the specified integer binary operation if the target indicates it is
@@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
 
     DEBUG(dbgs() << "\nPromoting ";
           Op.getNode()->dump(&DAG));
-    SDLoc dl(Op);
-    return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                       DAG.getNode(Opc, DL, PVT, NN0, NN1));
   }
   return SDValue();
 }
@@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     DEBUG(dbgs() << "\nPromoting ";
           Op.getNode()->dump(&DAG));
-    SDLoc dl(Op);
-    return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                       DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
   }
   return SDValue();
 }
@@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
     assert(PVT != VT && "Don't know what type to promote to!");
 
-    SDLoc dl(Op);
+    SDLoc DL(Op);
     SDNode *N = Op.getNode();
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT MemVT = LD->getMemoryVT();
@@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
                                                        : ISD::EXTLOAD)
       : LD->getExtensionType();
-    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+    SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
                                    LD->getChain(), LD->getBasePtr(),
                                    MemVT, LD->getMemOperand());
-    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
 
     DEBUG(dbgs() << "\nPromoting ";
           N->dump(&DAG);
@@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
       continue;
 
     assert(N->getOpcode() != ISD::DELETED_NODE &&
-           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+           RV.getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
     DEBUG(dbgs() << " ... into: ";
@@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         break;
 
       case ISD::TokenFactor:
-        if (Op.hasOneUse() &&
-            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+        if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
           // Clean up in case the token factor is removed.
@@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
           Changed = true;
           break;
         }
-        // Fall thru
+        LLVM_FALLTHROUGH;
 
       default:
         // Only add if it isn't already in the list.
@@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   // fold (add x, undef) -> undef
   if (N0.isUndef())
     return N0;
+
   if (N1.isUndef())
     return N1;
+
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
     // canonicalize constant to RHS
     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
-      return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+      return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
     // fold (add c1, c2) -> c1+c2
-    return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
-                                      N0.getNode(), N1.getNode());
+    return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
+                                      N1.getNode());
   }
+
   // fold (add x, 0) -> x
   if (isNullConstant(N1))
     return N0;
+
   // fold ((c1-A)+c2) -> (c1+c2)-A
-  if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
     if (N0.getOpcode() == ISD::SUB)
-      if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
-        SDLoc DL(N);
+      if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
         return DAG.getNode(ISD::SUB, DL, VT,
-                           DAG.getConstant(N1C->getAPIntValue()+
-                                           N0C->getAPIntValue(), DL, VT),
+                           DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
                            N0.getOperand(1));
       }
   }
+
   // reassociate add
-  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
+  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
     return RADD;
+
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
+  if (N0.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
+  if (N1.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
   // fold (A+(B-A)) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
     return N1.getOperand(0);
+
   // fold ((B-A)+A) -> B
   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
     return N0.getOperand(0);
+
   // fold (A+(B-(A+C))) to (B-C)
   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
       N0 == N1.getOperand(1).getOperand(0))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
                        N1.getOperand(1).getOperand(1));
+
   // fold (A+(B-(C+A))) to (B-C)
   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
       N0 == N1.getOperand(1).getOperand(1))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
                        N1.getOperand(1).getOperand(0));
+
   // fold (A+((B-A)+or-C)) to (B+or-C)
   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
       N1.getOperand(0).getOpcode() == ISD::SUB &&
       N0 == N1.getOperand(0).getOperand(1))
-    return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
-                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
+    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+                       N1.getOperand(1));
 
   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
@@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     SDValue N10 = N1.getOperand(0);
     SDValue N11 = N1.getOperand(1);
 
-    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+    if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+      return DAG.getNode(ISD::SUB, DL, VT,
                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   }
 
-  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (a+b) -> (a|b) iff a and b share no bits.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
-      VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
-    return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+    return DAG.getNode(ISD::OR, DL, VT, N0, N1);
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N1.getOperand(0).getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+      isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N0,
+                       DAG.getNode(ISD::SHL, DL, VT,
                                    N1.getOperand(0).getOperand(1),
                                    N1.getOperand(1)));
   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N0.getOperand(0).getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+      isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N1,
+                       DAG.getNode(ISD::SHL, DL, VT,
                                    N0.getOperand(0).getOperand(1),
                                    N0.getOperand(1)));
 
   if (N1.getOpcode() == ISD::AND) {
     SDValue AndOp0 = N1.getOperand(0);
     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
-    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarSizeInBits();
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
-      SDLoc DL(N);
+    if (NumSignBits == DestBits &&
+        isOneConstantOrOneSplatConstant(N1->getOperand(1)))
       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
-    }
   }
 
   // add (sext i1), X -> sub X, (zext i1)
   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
       N0.getOperand(0).getValueType() == MVT::i1 &&
       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
-    SDLoc DL(N);
     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
@@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
     if (TN->getVT() == MVT::i1) {
-      SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
                                  DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
@@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (sub x, x) -> 0
   // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
     // fold (sub c1, c2) -> c1-c2
-    return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
-                                      N0.getNode(), N1.getNode());
+    return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+                                      N1.getNode());
   }
-  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+
   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
   // fold (sub x, c) -> (add x, -c)
   if (N1C) {
-    SDLoc DL(N);
     return DAG.getNode(ISD::ADD, DL, VT, N0,
                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   }
+
+  if (isNullConstantOrNullSplatConstant(N0)) {
+    unsigned BitWidth = VT.getScalarSizeInBits();
+    // Right-shifting everything out but the sign bit followed by negation is
+    // the same as flipping arithmetic/logical shift type without the negation:
+    // -(X >>u 31) -> (X >>s 31)
+    // -(X >>s 31) -> (X >>u 31)
+    if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+      ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+      if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+        auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+        if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+          return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+      }
+    }
+
+    // 0 - X --> 0 if the sub is NUW.
+    if (N->getFlags()->hasNoUnsignedWrap())
+      return N0;
+
+    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+      // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+      // N1 must be 0 because negating the minimum signed value is undefined.
+      if (N->getFlags()->hasNoSignedWrap())
+        return N0;
+
+      // 0 - X --> X if X is 0 or the minimum signed value.
+      return N1;
+    }
+  }
+
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (isAllOnesConstant(N0))
-    return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+  if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
   // fold A-(A-B) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
     return N1.getOperand(1);
+
   // fold (A+B)-A -> B
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
     return N0.getOperand(1);
+
   // fold (A+B)-B -> A
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
+
   // fold C2-(A+C1) -> (C2-C1)-A
-  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
-    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
-  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
-    SDLoc DL(N);
-    SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
-                                   DL, VT);
-    return DAG.getNode(ISD::SUB, DL, VT, NewC,
-                       N1.getOperand(0));
+  if (N1.getOpcode() == ISD::ADD) {
+    SDValue N11 = N1.getOperand(1);
+    if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
+        isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
+      SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+      return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+    }
   }
+
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
        N0.getOperand(1).getOpcode() == ISD::ADD) &&
       N0.getOperand(1).getOperand(0) == N1)
-    return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
+    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(1));
+
   // fold ((A+(C+B))-B) -> A+C
-  if (N0.getOpcode() == ISD::ADD &&
-      N0.getOperand(1).getOpcode() == ISD::ADD &&
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
       N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(0));
+
   // fold ((A-(B-C))-C) -> A-B
-  if (N0.getOpcode() == ISD::SUB &&
-      N0.getOperand(1).getOpcode() == ISD::SUB &&
+  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
       N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(0));
 
   // If either operand of a sub is undef, the result is undef
   if (N0.isUndef())
@@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
                                     GA->getOffset() -
-                                      (uint64_t)N1C->getSExtValue());
+                                        (uint64_t)N1C->getSExtValue());
       // fold (sub Sym+c1, Sym+c2) -> c1-c2
       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
         if (GA->getGlobal() == GB->getGlobal())
           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
-                                 SDLoc(N), VT);
+                                 DL, VT);
     }
 
   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
     if (TN->getVT() == MVT::i1) {
-      SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
                                  DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
@@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   // We require a splat of the entire scalar bit width for non-contiguous
   // bit patterns.
   bool IsFullSplat =
-    ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
+    ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
   // fold (mul x, 1) -> x
   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
     return N0;
@@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                       getShiftAmountTy(N0.getValueType()))));
   }
 
-  APInt Val;
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
-  if (N1IsConst && N0.getOpcode() == ISD::SHL &&
-      (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
-       isa<ConstantSDNode>(N0.getOperand(1)))) {
+  if (N0.getOpcode() == ISD::SHL &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
-    AddToWorklist(C3.getNode());
-    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+    if (isConstantOrConstantVector(C3))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
   }
 
   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   // use.
   {
     SDValue Sh(nullptr, 0), Y(nullptr, 0);
+
     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
     if (N0.getOpcode() == ISD::SHL &&
-        (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
-         isa<ConstantSDNode>(N0.getOperand(1))) &&
+        isConstantOrConstantVector(N0.getOperand(1)) &&
         N0.getNode()->hasOneUse()) {
       Sh = N0; Y = N1;
     } else if (N1.getOpcode() == ISD::SHL &&
-               isa<ConstantSDNode>(N1.getOperand(1)) &&
+               isConstantOrConstantVector(N1.getOperand(1)) &&
                N1.getNode()->hasOneUse()) {
       Sh = N1; Y = N0;
     }
@@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
   SDValue Op1 = Node->getOperand(1);
   SDValue combined;
   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
-         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
-    SDNode *User = *UI;
+         UE = Op0.getNode()->use_end(); UI != UE;) {
+    SDNode *User = *UI++;
     if (User == Node || User->use_empty())
       continue;
     // Convert the other matching node(s), too;
@@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
-  if (!VT.isVector()) {
-    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-      return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
-  }
+  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
 
   // fold (sdiv X, pow2) -> simple ops after legalize
   // FIXME: We check for the exact bit here because the generic lowering gives
@@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return Op;
 
   // sdiv, srem -> sdivrem
-  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
-  // Otherwise, we break the simplification logic in visitREM().
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+  // true.  Otherwise, we break the simplification logic in visitREM().
   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
@@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
                                                     N0C, N1C))
       return Folded;
+
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::SRL, DL, VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
-                                       getShiftAmountTy(N0.getValueType())));
+  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+      DAG.isKnownToBeAPowerOfTwo(N1)) {
+    SDValue LogBase2 = BuildLogBase2(N1, DL);
+    AddToWorklist(LogBase2.getNode());
+
+    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+    AddToWorklist(Trunc.getNode());
+    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+  }
 
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
-      if (SHC->getAPIntValue().isPowerOf2()) {
-        EVT ADDVT = N1.getOperand(1).getValueType();
-        SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
-                                  N1.getOperand(1),
-                                  DAG.getConstant(SHC->getAPIntValue()
-                                                                  .logBase2(),
-                                                  DL, ADDVT));
-        AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
-      }
+    SDValue N10 = N1.getOperand(0);
+    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+        DAG.isKnownToBeAPowerOfTwo(N10)) {
+      SDValue LogBase2 = BuildLogBase2(N10, DL);
+      AddToWorklist(LogBase2.getNode());
+
+      EVT ADDVT = N1.getOperand(1).getValueType();
+      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+      AddToWorklist(Trunc.getNode());
+      SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
     }
   }
 
@@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
       return Op;
 
   // sdiv, srem -> sdivrem
-  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
-  // Otherwise, we break the simplification logic in visitREM().
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+  // true.  Otherwise, we break the simplification logic in visitREM().
   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
@@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
   if (isSigned) {
     // If we know the sign bits of both operands are zero, strength reduce to a
     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
-    if (!VT.isVector()) {
-      if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-        return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
-    }
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   } else {
     // fold (urem x, pow2) -> (and x, pow2-1)
-    if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
-        N1C->getAPIntValue().isPowerOf2()) {
-      return DAG.getNode(ISD::AND, DL, VT, N0,
-                         DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+    if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+      APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+      SDValue Add =
+          DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
     }
     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
-    if (N1.getOpcode() == ISD::SHL) {
-      ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
-      if (SHC && SHC->getAPIntValue().isPowerOf2()) {
-        APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
-        SDValue Add =
-            DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
-        AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
-      }
+    if (N1.getOpcode() == ISD::SHL &&
+        DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+      APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+      SDValue Add =
+          DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
     }
   }
 
@@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   if (isOneConstant(N1)) {
     SDLoc DL(N);
     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
-                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
-                                       DL,
+                       DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
                                        getShiftAmountTy(N0.getValueType())));
   }
   // fold (mulhs x, undef) -> 0
@@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
   // Bail early if none of these transforms apply.
-  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+  if (N0.getNumOperands() == 0) return SDValue();
 
   // For each of OP in AND/OR/XOR:
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
@@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         LL.getValueType().isInteger()) {
       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
       }
       if (isAllOnesConstant(LR)) {
         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
         if (Op1 == ISD::SETEQ) {
-          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
-                                        LR.getValueType(), LL, RL);
-          AddToWorklist(ANDNode.getNode());
-          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+          EVT CCVT = getSetCCResultType(LR.getValueType());
+          if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+            SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+                                          LR.getValueType(), LL, RL);
+            AddToWorklist(ANDNode.getNode());
+            return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+          }
         }
         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
         if (Op1 == ISD::SETGT) {
-          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                       LR.getValueType(), LL, RL);
-          AddToWorklist(ORNode.getNode());
-          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+          EVT CCVT = getSetCCResultType(LR.getValueType());
+          if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+            SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                         LR.getValueType(), LL, RL);
+            AddToWorklist(ORNode.getNode());
+            return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+          }
         }
       }
     }
@@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         Op0 == Op1 && LL.getValueType().isInteger() &&
       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
-      SDLoc DL(N0);
-      SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
-                                    LL, DAG.getConstant(1, DL,
-                                                        LL.getValueType()));
-      AddToWorklist(ADDNode.getNode());
-      return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
-                          DAG.getConstant(2, DL, LL.getValueType()),
-                          ISD::SETUGE);
+      EVT CCVT = getSetCCResultType(LL.getValueType());
+      if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+        SDLoc DL(N0);
+        SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+                                      LL, DAG.getConstant(1, DL,
+                                                          LL.getValueType()));
+        AddToWorklist(ADDNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+                            DAG.getConstant(2, DL, LL.getValueType()),
+                            ISD::SETUGE);
+      }
     }
     // canonicalize equivalent to ll == rl
     if (LL == RR && LR == RL) {
@@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         unsigned Size = VT.getSizeInBits();
         const APInt &AndMask = CAnd->getAPIntValue();
         unsigned ShiftBits = CShift->getZExtValue();
+
+        // Bail out, this node will probably disappear anyway.
+        if (ShiftBits == 0)
+          return SDValue();
+
         unsigned MaskBits = AndMask.countTrailingOnes();
         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
 
@@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
           // extended to handle extensions mixed in.
 
           SDValue SL(N0);
-          assert(ShiftBits != 0 && MaskBits <= Size);
+          assert(MaskBits <= Size);
 
           // Extracting the highest bit of the low half.
           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
@@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N1.getValueType();
 
+  // x & x --> x
+  if (N0 == N1)
+    return N0;
+
   // fold vector ops
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
       // do not return N0, because undef node may exist in N0
-      return DAG.getConstant(
-          APInt::getNullValue(
-              N0.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N0.getValueType());
+      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+                             SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
-      return DAG.getConstant(
-          APInt::getNullValue(
-              N1.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N1.getValueType());
+      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+                             SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold (and c1, c2) -> c1&c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
@@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (isAllOnesConstant(N1))
     return N0;
   // if (and x, c) is known to be zero, return 0
-  unsigned BitWidth = VT.getScalarType().getSizeInBits();
+  unsigned BitWidth = VT.getScalarSizeInBits();
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(BitWidth)))
     return DAG.getConstant(0, SDLoc(N), VT);
@@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
-    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+    if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
         return N1;
   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N0Op0 = N0.getOperand(0);
     APInt Mask = ~N1C->getAPIntValue();
-    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+    Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
                                  N0.getValueType(), N0Op0);
@@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         // that will apply equally to all members of the vector, so AND all the
         // lanes of the constant together.
         EVT VT = Vector->getValueType(0);
-        unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+        unsigned BitWidth = VT.getScalarSizeInBits();
 
         // If the splat value has been compressed to a bitlength lower
         // than the size of the vector lane, we need to re-expand it to
@@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     // Resize the constant to the same size as the original memory access before
     // extension. If it is still the AllOnesValue then this AND is completely
     // unneeded.
-    Constant =
-      Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+    Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
 
     bool B;
     switch (Load->getExtensionType()) {
@@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
-  if (N1C && (N0.getOpcode() == ISD::LOAD ||
-              (N0.getOpcode() == ISD::ANY_EXTEND &&
-               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
+                                (N0.getOpcode() == ISD::ANY_EXTEND &&
+                                 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
     LoadSDNode *LN0 = HasAnyExt
       ? cast<LoadSDNode>(N0.getOperand(0))
@@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
       return Tmp;
 
+  // Masking the negated extension of a boolean is just the zero-extended
+  // boolean:
+  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+  //
+  // Note: the SimplifyDemandedBits fold below can make an information-losing
+  // transform, and then we have no way to find this better fold.
+  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+    ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
+    SDValue SubRHS = N0.getOperand(1);
+    if (SubLHS && SubLHS->isNullValue()) {
+      if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+        return SubRHS;
+      if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+    }
+  }
+
   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   // fold (and (sra)) -> (and (srl)) when possible.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
+  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (zext_inreg (extload x)) -> (zextload x)
@@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    unsigned BitWidth = N1.getScalarValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    unsigned BitWidth = N1.getScalarValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
     std::swap(N0, N1);
   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
     return SDValue();
-  if (!N0.getNode()->hasOneUse() ||
-      !N1.getNode()->hasOneUse())
+  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
     return SDValue();
 
   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
       }
       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+                                        LR.getValueType(), LL, RL);
+          AddToWorklist(ANDNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        }
       }
     }
     // canonicalize equivalent to ll == rl
@@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N1.getValueType();
 
+  // x | x --> x
+  if (N0 == N1)
+    return N0;
+
   // fold vector ops
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
       // do not return N0, because undef node may exist in N0
       return DAG.getConstant(
-          APInt::getAllOnesValue(
-              N0.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N0.getValueType());
+          APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
+          N0.getValueType());
     if (ISD::isBuildVectorAllOnes(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
-          APInt::getAllOnesValue(
-              N1.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N1.getValueType());
+          APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
+          N1.getValueType());
 
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
     // Do this only if the resulting shuffle is legal.
@@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
   return nullptr;
 }
 
+namespace {
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+///  (load (i64 add (i64 copyfromreg %c)
+///                 (i64 signextend (add (i8 load %index)
+///                                      (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
+///                                         (i32 1)))))
+struct BaseIndexOffset {
+  SDValue Base;
+  SDValue Index;
+  int64_t Offset;
+  bool IsIndexSignExt;
+
+  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+                  bool IsIndexSignExt) :
+    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+  bool equalBaseIndex(const BaseIndexOffset &Other) {
+    return Other.Base == Base && Other.Index == Index &&
+      Other.IsIndexSignExt == IsIndexSignExt;
+  }
+
+  /// Parses tree in Ptr for base, index, offset addresses.
+  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
+                               int64_t PartialOffset = 0) {
+    bool IsIndexSignExt = false;
+
+    // Split up a folded GlobalAddress+Offset into its component parts.
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+      if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+        return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+                                                    SDLoc(GA),
+                                                    GA->getValueType(0),
+                                                    /*Offset=*/PartialOffset,
+                                                    /*isTargetGA=*/false,
+                                                    GA->getTargetFlags()),
+                               SDValue(),
+                               GA->getOffset(),
+                               IsIndexSignExt);
+      }
+
+    // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+    // instruction, then it could be just the BASE or everything else we don't
+    // know how to handle. Just use Ptr as BASE and give up.
+    if (Ptr->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+    // We know that we have at least an ADD instruction. Try to pattern match
+    // the simple case of BASE + OFFSET.
+    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+      return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
+    }
+
+    // Inside a loop the current BASE pointer is calculated using an ADD and a
+    // MUL instruction. In this case Ptr is the actual BASE pointer.
+    // (i64 add (i64 %array_ptr)
+    //          (i64 mul (i64 %induction_var)
+    //                   (i64 %element_size)))
+    if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+    // Look at Base + Index + Offset cases.
+    SDValue Base = Ptr->getOperand(0);
+    SDValue IndexOffset = Ptr->getOperand(1);
+
+    // Skip signextends.
+    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+      IndexOffset = IndexOffset->getOperand(0);
+      IsIndexSignExt = true;
+    }
+
+    // Either the case of Base + Index (no offset) or something else.
+    if (IndexOffset->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
+
+    // Now we have the case of Base + Index + offset.
+    SDValue Index = IndexOffset->getOperand(0);
+    SDValue Offset = IndexOffset->getOperand(1);
+
+    if (!isa<ConstantSDNode>(Offset))
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+    // Ignore signextends.
+    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+      Index = Index->getOperand(0);
+      IsIndexSignExt = true;
+    } else IsIndexSignExt = false;
+
+    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+    return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
+  }
+};
+} // namespace
+
 SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   if (!BinOpCst) return SDValue();
 
-  // FIXME: disable this unless the input to the binop is a shift by a constant.
-  // If it is not a shift, it pessimizes some common cases like:
-  //
-  //    void foo(int *X, int i) { X[i & 1235] = 1; }
-  //    int bar(int *X, int i) { return X[i & 255]; }
+  // FIXME: disable this unless the input to the binop is a shift by a constant
+  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
-  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
-       BinOpLHSVal->getOpcode() != ISD::SRA &&
-       BinOpLHSVal->getOpcode() != ISD::SRL) ||
-      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
+                 BinOpLHSVal->getOpcode() == ISD::SRA ||
+                 BinOpLHSVal->getOpcode() == ISD::SRL;
+  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
+                        BinOpLHSVal->getOpcode() == ISD::SELECT;
+
+  if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
+      !isCopyOrSelect)
+    return SDValue();
+
+  if (isCopyOrSelect && N->hasOneUse())
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
     SDValue N01 = N->getOperand(0).getOperand(1);
-
-    if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      if (!N01C->isOpaque()) {
-        EVT TruncVT = N->getValueType(0);
-        SDValue N00 = N->getOperand(0).getOperand(0);
-        APInt TruncC = N01C->getAPIntValue();
-        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
-        SDLoc DL(N);
-
-        return DAG.getNode(ISD::AND, DL, TruncVT,
-                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
-                           DAG.getConstant(TruncC, DL, TruncVT));
-      }
+    if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+      SDLoc DL(N);
+      EVT TruncVT = N->getValueType(0);
+      SDValue N00 = N->getOperand(0).getOperand(0);
+      SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+      SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+      AddToWorklist(Trunc00.getNode());
+      AddToWorklist(Trunc01.getNode());
+      return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
     }
   }
 
@@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
@@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
                                                      N01CV, N1CV))
             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
         }
-      } else {
-        N1C = isConstOrConstSplat(N1);
       }
     }
   }
 
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
   // fold (shl c1, c2) -> c1<<c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
@@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      uint64_t c1 = N0C1->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
       SDLoc DL(N);
-      if (c1 + c2 >= OpSizeInBits)
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
         return DAG.getConstant(0, DL, VT);
-      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+      return DAG.getNode(
+          ISD::SHL, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(0).getOpcode() == ISD::SHL) {
     SDValue N0Op0 = N0.getOperand(0);
     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      uint64_t c1 = N0Op0C1->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
+      APInt c1 = N0Op0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
       EVT InnerShiftVT = N0Op0.getValueType();
       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
-      if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c2.uge(OpSizeInBits - InnerShiftSize)) {
         SDLoc DL(N0);
-        if (c1 + c2 >= OpSizeInBits)
+        APInt Sum = c1 + c2;
+        if (Sum.uge(OpSizeInBits))
           return DAG.getConstant(0, DL, VT);
-        return DAG.getNode(ISD::SHL, DL, VT,
-                           DAG.getNode(N0.getOpcode(), DL, VT,
-                                       N0Op0->getOperand(0)),
-                           DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+        return DAG.getNode(
+            ISD::SHL, DL, VT,
+            DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
+            DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
       }
     }
   }
@@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(0).getOpcode() == ISD::SRL) {
     SDValue N0Op0 = N0.getOperand(0);
     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      uint64_t c1 = N0Op0C1->getZExtValue();
-      if (c1 < VT.getScalarSizeInBits()) {
+      if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
+        uint64_t c1 = N0Op0C1->getZExtValue();
         uint64_t c2 = N1C->getZExtValue();
         if (c1 == c2) {
           SDValue NewOp0 = N0.getOperand(0);
@@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       }
     }
   }
+
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+  if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true)) {
     unsigned BitSize = VT.getScalarSizeInBits();
     SDLoc DL(N);
-    SDValue HiBitsMask =
-      DAG.getConstant(APInt::getHighBitsSet(BitSize,
-                                            BitSize - N1C->getZExtValue()),
-                      DL, VT);
-    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
-                       HiBitsMask);
+    SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+    SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
   }
 
   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   // Variant of version done on multiply, except mul by a power of 2 is turned
   // into a shift.
-  APInt Val;
-  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
-      (isa<ConstantSDNode>(N0.getOperand(1)) ||
-       ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+  if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    AddToWorklist(Shl0.getNode());
+    AddToWorklist(Shl1.getNode());
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   }
 
   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
-  if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
-    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      if (SDValue Folded =
-              DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
-        return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
-    }
+  if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+    SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    if (isConstantOrConstantVector(Shl))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
   }
 
   if (N1C && !N1C->isOpaque())
@@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+    return N0;
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (VT.isVector()) {
+  if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
-    N1C = isConstOrConstSplat(N1);
-  }
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (sra c1, c2) -> (sra c1, c2)
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // fold (sra -1, x) -> -1
   if (isAllOnesConstant(N0))
     return N0;
-  // fold (sra x, (setge c, size(x))) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  // fold (sra x, c >= size(x)) -> undef
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
     return DAG.getUNDEF(VT);
   // fold (sra x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRA) {
-    if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= OpSizeInBits)
-        Sum = OpSizeInBits - 1;
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       SDLoc DL(N);
-      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, DL, N1.getValueType()));
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
+        Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+
+      return DAG.getNode(
+          ISD::SRA, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (VT.isVector()) {
+  if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
-    N1C = isConstOrConstSplat(N1);
-  }
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (srl c1, c2) -> c1 >>u c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (isNullConstant(N0))
     return N0;
   // fold (srl x, c >= size(x)) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
     return DAG.getUNDEF(VT);
   // fold (srl x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL) {
-    if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
-      uint64_t c1 = N01C->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       SDLoc DL(N);
-      if (c1 + c2 >= OpSizeInBits)
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
         return DAG.getConstant(0, DL, VT);
-      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+      return DAG.getNode(
+          ISD::SRL, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     uint64_t c2 = N1C->getZExtValue();
     EVT InnerShiftVT = N0.getOperand(0).getValueType();
     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
-    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
     if (c1 + OpSizeInBits == InnerShiftSize) {
       SDLoc DL(N0);
@@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   }
 
   // fold (srl (shl x, c), c) -> (and x, cst2)
-  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
-    unsigned BitSize = N0.getScalarValueSizeInBits();
-    if (BitSize <= 64) {
-      uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
-      SDLoc DL(N);
-      return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(~0ULL >> ShAmt, DL, VT));
-    }
+  if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+    SDLoc DL(N);
+    APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
+    SDValue Mask =
+        DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+    AddToWorklist(Mask.getNode());
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
   }
 
   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   }
 }
 
+// TODO: We should handle other cases of selecting between {-1,0,1} here.
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+  SDValue Cond = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  EVT CondVT = Cond.getValueType();
+  SDLoc DL(N);
+
+  // fold (select Cond, 0, 1) -> (xor Cond, 1)
+  // We can't do this reliably if integer based booleans have different contents
+  // to floating point based booleans. This is because we can't tell whether we
+  // have an integer-based boolean or a floating-point-based boolean unless we
+  // can find the SETCC that produced it and inspect its operands. This is
+  // fairly easy if C is the SETCC node, but it can potentially be
+  // undiscoverable (or not reasonably discoverable). For example, it could be
+  // in another basic block or it could require searching a complicated
+  // expression.
+  if (VT.isInteger() &&
+      (CondVT == MVT::i1 || (CondVT.isInteger() &&
+                             TLI.getBooleanContents(false, true) ==
+                                 TargetLowering::ZeroOrOneBooleanContent &&
+                             TLI.getBooleanContents(false, false) ==
+                                 TargetLowering::ZeroOrOneBooleanContent)) &&
+      isNullConstant(N1) && isOneConstant(N2)) {
+    SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
+                                  DAG.getConstant(1, DL, CondVT));
+    if (VT.bitsEq(CondVT))
+      return NotCond;
+    return DAG.getZExtOrTrunc(NotCond, DL, VT);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -5080,39 +5361,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // fold (select false, X, Y) -> Y
     return !N0C->isNullValue() ? N1 : N2;
   }
-  // fold (select C, 1, X) -> (or C, X)
-  if (VT == MVT::i1 && isOneConstant(N1))
+  // fold (select X, X, Y) -> (or X, Y)
+  // fold (select X, 1, Y) -> (or C, Y)
+  if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
-  // fold (select C, 0, 1) -> (xor C, 1)
-  // We can't do this reliably if integer based booleans have different contents
-  // to floating point based booleans. This is because we can't tell whether we
-  // have an integer-based boolean or a floating-point-based boolean unless we
-  // can find the SETCC that produced it and inspect its operands. This is
-  // fairly easy if C is the SETCC node, but it can potentially be
-  // undiscoverable (or not reasonably discoverable). For example, it could be
-  // in another basic block or it could require searching a complicated
-  // expression.
-  if (VT.isInteger() &&
-      (VT0 == MVT::i1 || (VT0.isInteger() &&
-                          TLI.getBooleanContents(false, false) ==
-                              TLI.getBooleanContents(false, true) &&
-                          TLI.getBooleanContents(false, false) ==
-                              TargetLowering::ZeroOrOneBooleanContent)) &&
-      isNullConstant(N1) && isOneConstant(N2)) {
-    SDValue XORNode;
-    if (VT == VT0) {
-      SDLoc DL(N);
-      return DAG.getNode(ISD::XOR, DL, VT0,
-                         N0, DAG.getConstant(1, DL, VT0));
-    }
-    SDLoc DL0(N0);
-    XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
-                          N0, DAG.getConstant(1, DL0, VT0));
-    AddToWorklist(XORNode.getNode());
-    if (VT.bitsGT(VT0))
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
-    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
-  }
+
+  if (SDValue V = foldSelectOfConstants(N))
+    return V;
+
   // fold (select C, 0, X) -> (and (not C), X)
   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
@@ -5125,16 +5381,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     AddToWorklist(NOTNode.getNode());
     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   }
-  // fold (select C, X, 0) -> (and C, X)
-  if (VT == MVT::i1 && isNullConstant(N2))
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
-  // fold (select X, X, Y) -> (or X, Y)
-  // fold (select X, 1, Y) -> (or X, Y)
-  if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
-    return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select X, Y, X) -> (and X, Y)
   // fold (select X, Y, 0) -> (and X, Y)
-  if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
+  if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
 
   // If we can fold this based on the true/false value, do so.
@@ -5145,7 +5394,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // The code in this block deals with the following 2 equivalences:
     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
-    // The target can specify its prefered form with the
+    // The target can specify its preferred form with the
     // shouldNormalizeToSelectSequence() callback. However we always transform
     // to the right anyway if we find the inner select exists in the DAG anyway
     // and we always transform to the left side if we know that we can further
@@ -5214,6 +5463,18 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     }
   }
 
+  // select (xor Cond, 1), X, Y -> select Cond, Y, X
+  if (VT0 == MVT::i1) {
+    if (N0->getOpcode() == ISD::XOR) {
+      if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
+        SDValue Cond0 = N0->getOperand(0);
+        if (C->isOne())
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
+                             Cond0, N2, N1);
+      }
+    }
+  }
+
   // fold selects based on a setcc into other things, such as min/max/abs
   if (N0.getOpcode() == ISD::SETCC) {
     // select x, y (fcmp lt x, y) -> fminnum x, y
@@ -5269,7 +5530,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
 // This function assumes all the vselect's arguments are CONCAT_VECTOR
 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
-  SDLoc dl(N);
+  SDLoc DL(N);
   SDValue Cond = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
@@ -5316,7 +5577,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
          "One half of the selector was all UNDEFs and the other was all the "
          "same value. This should have been addressed before this function.");
   return DAG.getNode(
-      ISD::CONCAT_VECTORS, dl, VT,
+      ISD::CONCAT_VECTORS, DL, VT,
       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
 }
@@ -5390,6 +5651,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
   SDValue Data  = MST->getValue();
+  EVT VT = Data.getValueType();
   SDLoc DL(N);
 
   // If the MSTORE data type requires splitting and the mask is provided by a
@@ -5399,16 +5661,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   if (Mask.getOpcode() == ISD::SETCC) {
 
     // Check if any splitting is required.
-    if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
         TargetLowering::TypeSplitVector)
       return SDValue();
 
     SDValue MaskLo, MaskHi, Lo, Hi;
     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-    EVT LoVT, HiVT;
-    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
-
     SDValue Chain = MST->getChain();
     SDValue Ptr   = MST->getBasePtr();
 
@@ -5418,8 +5677,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     // if Alignment is equal to the vector size,
     // take the half of it for the second part
     unsigned SecondHalfAlignment =
-      (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
-         Alignment/2 : Alignment;
+      (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
 
     EVT LoMemVT, HiMemVT;
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -5433,11 +5691,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
                            Alignment, MST->getAAInfo(), MST->getRanges());
 
     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                            MST->isTruncatingStore());
+                            MST->isTruncatingStore(),
+                            MST->isCompressingStore());
 
-    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                     MST->isCompressingStore());
 
     MMO = DAG.getMachineFunction().
       getMachineMemOperand(MST->getPointerInfo(),
@@ -5446,7 +5704,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
                            MST->getRanges());
 
     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                            MST->isTruncatingStore());
+                            MST->isTruncatingStore(),
+                            MST->isCompressingStore());
 
     AddToWorklist(Lo.getNode());
     AddToWorklist(Hi.getNode());
@@ -5585,11 +5844,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
-                           ISD::NON_EXTLOAD);
+                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
-    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                     MLD->isExpandingLoad()); 
 
     MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(),
@@ -5597,7 +5855,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
-                           ISD::NON_EXTLOAD);
+                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
     AddToWorklist(Lo.getNode());
     AddToWorklist(Hi.getNode());
@@ -5625,6 +5883,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDLoc DL(N);
 
+  // fold (vselect C, X, X) -> X
+  if (N1 == N2)
+    return N1;
+
   // Canonicalize integer abs.
   // vselect (setg[te] X,  0),  X, -X ->
   // vselect (setgt    X, -1),  X, -X ->
@@ -5648,7 +5910,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       EVT VT = LHS.getValueType();
       SDValue Shift = DAG.getNode(
           ISD::SRA, DL, VT, LHS,
-          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
+          DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -5803,7 +6065,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
 
   // We can fold this node into a build_vector.
   unsigned VTBits = SVT.getSizeInBits();
-  unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
   SmallVector<SDValue, 8> Elts;
   unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
@@ -6026,7 +6288,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // fold (sext (truncate (load x))) -> (sext (smaller load x))
     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6038,9 +6300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // See if the value being truncated is already sign extended.  If so, just
     // eliminate the trunc/sext pair.
     SDValue Op = N0.getOperand(0);
-    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
-    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
-    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned OpBits   = Op.getScalarValueSizeInBits();
+    unsigned MidBits  = N0.getScalarValueSizeInBits();
+    unsigned DestBits = VT.getScalarSizeInBits();
     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -6201,7 +6463,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
     // Here, T can be 1 or -1, depending on the type of the setcc and
     // getBooleanContents().
-    unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
+    unsigned SetCCWidth = N0.getScalarValueSizeInBits();
 
     SDLoc DL(N);
     // To determine the "true" side of the select, we need to know the high bit
@@ -6323,7 +6585,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6338,7 +6600,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     // fold (zext (truncate (load x))) -> (zext (smaller load x))
     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode *oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6528,7 +6790,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       // elements we can use a matching integer vector type and then
       // truncate/sign extend.
       EVT MatchingElementType = EVT::getIntegerVT(
-          *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+          *DAG.getContext(), N00VT.getScalarSizeInBits());
       EVT MatchingVectorType = EVT::getVectorVT(
           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
       SDValue VsetCC =
@@ -6558,8 +6820,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       SDValue InnerZExt = N0.getOperand(0);
       // If the original shl may be shifting out bits, do not perform this
       // transformation.
-      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
-        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
+        InnerZExt.getOperand(0).getValueSizeInBits();
       if (ShAmtVal > KnownZeroBits)
         return SDValue();
     }
@@ -6598,7 +6860,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6625,15 +6887,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant &&
       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
                           N0.getValueType())) {
+    SDLoc DL(N);
     SDValue X = N0.getOperand(0).getOperand(0);
     if (X.getValueType().bitsLT(VT)) {
-      X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
+      X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
     } else if (X.getValueType().bitsGT(VT)) {
-      X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+      X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    SDLoc DL(N);
     return DAG.getNode(ISD::AND, DL, VT,
                        X, DAG.getConstant(Mask, DL, VT));
   }
@@ -6820,7 +7082,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       if ((ShAmt & (EVTBits-1)) == 0) {
         N0 = N0.getOperand(0);
         // Is the load width a multiple of size of VT?
-        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+        if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
 
@@ -6952,8 +7214,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
   EVT EVT = cast<VTSDNode>(N1)->getVT();
-  unsigned VTBits = VT.getScalarType().getSizeInBits();
-  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+  unsigned VTBits = VT.getScalarSizeInBits();
+  unsigned EVTBits = EVT.getScalarSizeInBits();
 
   if (N0.isUndef())
     return DAG.getUNDEF(VT);
@@ -6977,14 +7239,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+    if (N00.getScalarValueSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+  }
+
+  // fold (sext_in_reg (zext x)) -> (sext x)
+  // iff we are extending the source sign bit.
+  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getScalarValueSizeInBits() == EVTBits &&
         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   }
 
   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
-    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
+    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
 
   // fold operands of sext_in_reg based on knowledge that the top bits are not
   // demanded.
@@ -7111,6 +7382,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     return N0.getOperand(0);
   }
 
+  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+    return SDValue();
+
   // Fold extract-and-trunc into a narrow extract. For example:
   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   //   i32 y = TRUNCATE(i64 x)
@@ -7148,7 +7423,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   }
 
   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
-  if (N0.getOpcode() == ISD::SELECT) {
+  if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
     EVT SrcVT = N0.getValueType();
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
         TLI.isTruncateFree(SrcVT, VT)) {
@@ -7160,15 +7435,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     }
   }
 
-  // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t Amt = CAmt->getZExtValue();
-      unsigned Size = VT.getSizeInBits();
+      unsigned Size = VT.getScalarSizeInBits();
 
-      if (Amt < Size / 2) {
+      if (Amt < Size) {
         SDLoc SL(N);
         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
 
@@ -7525,7 +7800,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
       VT.isInteger() && !VT.isVector()) {
-    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+    unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (isTypeLegal(IntXVT)) {
       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
@@ -7848,10 +8123,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // More folding opportunities when target permits.
-  if ((AllowFusion || HasFMAD)  && Aggressive) {
+  if (Aggressive) {
     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-    if (N0.getOpcode() == PreferredFusedOpcode &&
-        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+    // are currently only supported on binary nodes.
+    if (Options.UnsafeFPMath &&
+        N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL &&
+        N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          N0.getOperand(0), N0.getOperand(1),
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -7861,8 +8140,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
     }
 
     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-    if (N1->getOpcode() == PreferredFusedOpcode &&
-        N1.getOperand(2).getOpcode() == ISD::FMUL) {
+    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+    // are currently only supported on binary nodes.
+    if (Options.UnsafeFPMath &&
+        N1->getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FMUL &&
+        N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          N1.getOperand(0), N1.getOperand(1),
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8090,11 +8373,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // More folding opportunities when target permits.
-  if ((AllowFusion || HasFMAD) && Aggressive) {
+  if (Aggressive) {
     // fold (fsub (fma x, y, (fmul u, v)), z)
     //   -> (fma x, y (fma u, v, (fneg z)))
-    if (N0.getOpcode() == PreferredFusedOpcode &&
-        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+    // are currently only supported on binary nodes.
+    if (Options.UnsafeFPMath &&
+        N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL &&
+        N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          N0.getOperand(0), N0.getOperand(1),
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8106,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
 
     // fold (fsub x, (fma y, z, (fmul u, v)))
     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-    if (N1.getOpcode() == PreferredFusedOpcode &&
+    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+    // are currently only supported on binary nodes.
+    if (Options.UnsafeFPMath &&
+        N1.getOpcode() == PreferredFusedOpcode &&
         N1.getOperand(2).getOpcode() == ISD::FMUL) {
       SDValue N20 = N1.getOperand(2).getOperand(0);
       SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -8221,8 +8511,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   return SDValue();
 }
 
-/// Try to perform FMA combining on a given FMUL node.
-SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
@@ -8231,17 +8523,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
 
   const TargetOptions &Options = DAG.getTarget().Options;
-  bool AllowFusion =
-      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
 
-  // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+  // The transforms below are incorrect when x == 0 and y == inf, because the
+  // intermediate multiplication produces a nan.
+  if (!Options.NoInfsFPMath)
+    return SDValue();
 
   // Floating-point multiply-add without intermediate rounding.
   bool HasFMA =
-      AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
+  // Floating-point multiply-add with intermediate rounding. This can result
+  // in a less precise result due to the changed rounding order.
+  bool HasFMAD = Options.UnsafeFPMath &&
+                 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
   // No valid opcode, do not combine.
   if (!HasFMAD && !HasFMA)
     return SDValue();
@@ -8338,17 +8636,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     return DAG.getNode(ISD::FSUB, DL, VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
 
+  // FIXME: Auto-upgrade the target/function-level option.
+  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+    // fold (fadd A, 0) -> A
+    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+      if (N1C->isZero())
+        return N0;
+  }
+
   // If 'unsafe math' is enabled, fold lots of things.
   if (Options.UnsafeFPMath) {
     // No FP constant should be created after legalization as Instruction
     // Selection pass has a hard time dealing with FP constants.
     bool AllowNewConst = (Level < AfterLegalizeDAG);
 
-    // fold (fadd A, 0) -> A
-    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
-      if (N1C->isZero())
-        return N0;
-
     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
@@ -8457,7 +8758,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
@@ -8468,30 +8769,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
+    return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
-    return DAG.getNode(ISD::FADD, dl, VT, N0,
+    return DAG.getNode(ISD::FADD, DL, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
-  // If 'unsafe math' is enabled, fold lots of things.
-  if (Options.UnsafeFPMath) {
-    // (fsub A, 0) -> A
-    if (N1CFP && N1CFP->isZero())
-      return N0;
-
+  // FIXME: Auto-upgrade the target/function-level option.
+  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
     // (fsub 0, B) -> -B
     if (N0CFP && N0CFP->isZero()) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
         return GetNegatedExpression(N1, DAG, LegalOperations);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-        return DAG.getNode(ISD::FNEG, dl, VT, N1);
+        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
     }
+  }
+
+  // If 'unsafe math' is enabled, fold lots of things.
+  if (Options.UnsafeFPMath) {
+    // (fsub A, 0) -> A
+    if (N1CFP && N1CFP->isZero())
+      return N0;
 
     // (fsub x, x) -> 0.0
     if (N0 == N1)
-      return DAG.getConstantFP(0.0f, dl, VT);
+      return DAG.getConstantFP(0.0f, DL, VT);
 
     // (fsub x, (fadd x, y)) -> (fneg y)
     // (fsub x, (fadd y, x)) -> (fneg y)
@@ -8611,7 +8915,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // FMUL -> FMA combines:
-  if (SDValue Fused = visitFMULForFMACombine(N)) {
+  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
     AddToWorklist(Fused.getNode());
     return Fused;
   }
@@ -8626,14 +8930,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // Constant fold FMA.
   if (isa<ConstantFPSDNode>(N0) &&
       isa<ConstantFPSDNode>(N1) &&
       isa<ConstantFPSDNode>(N2)) {
-    return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+    return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   }
 
   if (Options.UnsafeFPMath) {
@@ -8663,8 +8967,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
-      return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
                                      &Flags), &Flags);
     }
 
@@ -8672,9 +8976,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N0.getOpcode() == ISD::FMUL &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
-      return DAG.getNode(ISD::FMA, dl, VT,
+      return DAG.getNode(ISD::FMA, DL, VT,
                          N0.getOperand(0),
-                         DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+                         DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
                                      &Flags),
                          N2);
     }
@@ -8685,32 +8989,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   if (N1CFP) {
     if (N1CFP->isExactlyValue(1.0))
       // TODO: The FMA node should have flags that propagate to this node.
-      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+      return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
 
     if (N1CFP->isExactlyValue(-1.0) &&
         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
-      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+      SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
       AddToWorklist(RHSNeg.getNode());
       // TODO: The FMA node should have flags that propagate to this node.
-      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+      return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
     }
   }
 
   if (Options.UnsafeFPMath) {
     // (fma x, c, x) -> (fmul x, (c+1))
     if (N1CFP && N0 == N2) {
-    return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT,
-                                     N1, DAG.getConstantFP(1.0, dl, VT),
-                                     &Flags), &Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1,
+                                     DAG.getConstantFP(1.0, DL, VT), &Flags),
+                         &Flags);
     }
 
     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
-      return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT,
-                                     N1, DAG.getConstantFP(-1.0, dl, VT),
-                                     &Flags), &Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1,
+                                     DAG.getConstantFP(-1.0, DL, VT), &Flags),
+                         &Flags);
     }
   }
 
@@ -8720,7 +9024,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
 // reciprocal.
 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-// Notice that this is not always beneficial. One reason is different target
+// Notice that this is not always beneficial. One reason is different targets
 // may have different costs for FDIV and FMUL, so sometimes the cost of two
 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
@@ -8907,14 +9211,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+  if (!DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  if (TLI.isFsqrtCheap(N0, DAG))
     return SDValue();
 
   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
   // For now, create a Flags object for use with all unsafe math transforms.
   SDNodeFlags Flags;
   Flags.setUnsafeAlgebra(true);
-  return buildSqrtEstimate(N->getOperand(0), &Flags);
+  return buildSqrtEstimate(N0, &Flags);
 }
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -8941,11 +9249,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
 
-  if (N0CFP && N1CFP)  // Constant fold
+  if (N0CFP && N1CFP) // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
 
   if (N1CFP) {
-    const APFloat& V = N1CFP->getValueAPF();
+    const APFloat &V = N1CFP->getValueAPF();
     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
     if (!V.isNegative()) {
@@ -8963,8 +9271,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   // copysign(copysign(x,z), y) -> copysign(x, y)
   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
       N0.getOpcode() == ISD::FCOPYSIGN)
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0.getOperand(0), N1);
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
 
   // copysign(x, abs(y)) -> abs(x)
   if (N1.getOpcode() == ISD::FABS)
@@ -8972,14 +9279,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 
   // copysign(x, copysign(y,z)) -> copysign(x, z)
   if (N1.getOpcode() == ISD::FCOPYSIGN)
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0, N1.getOperand(1));
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
 
   // copysign(x, fp_extend(y)) -> copysign(x, y)
   // copysign(x, fp_round(y)) -> copysign(x, y)
   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0, N1.getOperand(0));
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
 
   return SDValue();
 }
@@ -9159,7 +9464,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   // fold (fp_round (fp_round x)) -> (fp_round x)
   if (N0.getOpcode() == ISD::FP_ROUND) {
     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
-    const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+    const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
 
     // Skip this folding if it results in an fp_round from f80 to f16.
     //
@@ -9232,7 +9537,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   // value of X.
   if (N0.getOpcode() == ISD::FP_ROUND
-      && N0.getNode()->getConstantOperandVal(1) == 1) {
+      && N0.getConstantOperandVal(1) == 1) {
     SDValue In = N0.getOperand(0);
     if (In.getValueType() == VT) return In;
     if (VT.bitsLT(In.getValueType()))
@@ -9319,7 +9624,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x80... per scalar element
         // and splat it.
-        SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x80...
@@ -9424,7 +9729,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x7f... per scalar element
         // and splat it.
-        SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x7f...
@@ -10103,7 +10408,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   // value.
   // TODO: Handle store large -> read small portion.
   // TODO: Handle TRUNCSTORE/LOADEXT
-  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+  if (OptLevel != CodeGenOpt::None &&
+      ISD::isNormalLoad(N) && !LD->isVolatile()) {
     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
       if (PrevST->getBasePtr() == Ptr &&
@@ -10405,7 +10711,7 @@ struct LoadedSlice {
     assert(Inst && Origin && "Unable to replace a non-existing slice.");
     const SDValue &OldBaseAddr = Origin->getBasePtr();
     SDValue BaseAddr = OldBaseAddr;
-    // Get the offset in that chunk of bytes w.r.t. the endianess.
+    // Get the offset in that chunk of bytes w.r.t. the endianness.
     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
     if (Offset) {
@@ -10705,7 +11011,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
        LSIt != LSItEnd; ++LSIt) {
     SDValue SliceInst = LSIt->loadSlice();
     CombineTo(LSIt->Inst, SliceInst, true);
-    if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+    if (SliceInst.getOpcode() != ISD::LOAD)
       SliceInst = SliceInst.getOperand(0);
     assert(SliceInst->getOpcode() == ISD::LOAD &&
            "It takes more than a zext to get to the loaded slice!!");
@@ -11033,110 +11339,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
-namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-///  (load (i64 add (i64 copyfromreg %c)
-///                 (i64 signextend (add (i8 load %index)
-///                                      (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
-///                                         (i32 1)))))
-struct BaseIndexOffset {
-  SDValue Base;
-  SDValue Index;
-  int64_t Offset;
-  bool IsIndexSignExt;
-
-  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
-  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
-                  bool IsIndexSignExt) :
-    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
-  bool equalBaseIndex(const BaseIndexOffset &Other) {
-    return Other.Base == Base && Other.Index == Index &&
-      Other.IsIndexSignExt == IsIndexSignExt;
-  }
-
-  /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
-    bool IsIndexSignExt = false;
-
-    // Split up a folded GlobalAddress+Offset into its component parts.
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
-      if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
-        return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
-                                                    SDLoc(GA),
-                                                    GA->getValueType(0),
-                                                    /*Offset=*/0,
-                                                    /*isTargetGA=*/false,
-                                                    GA->getTargetFlags()),
-                               SDValue(),
-                               GA->getOffset(),
-                               IsIndexSignExt);
-      }
-
-    // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
-    // instruction, then it could be just the BASE or everything else we don't
-    // know how to handle. Just use Ptr as BASE and give up.
-    if (Ptr->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // We know that we have at least an ADD instruction. Try to pattern match
-    // the simple case of BASE + OFFSET.
-    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
-      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
-                              IsIndexSignExt);
-    }
-
-    // Inside a loop the current BASE pointer is calculated using an ADD and a
-    // MUL instruction. In this case Ptr is the actual BASE pointer.
-    // (i64 add (i64 %array_ptr)
-    //          (i64 mul (i64 %induction_var)
-    //                   (i64 %element_size)))
-    if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // Look at Base + Index + Offset cases.
-    SDValue Base = Ptr->getOperand(0);
-    SDValue IndexOffset = Ptr->getOperand(1);
-
-    // Skip signextends.
-    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
-      IndexOffset = IndexOffset->getOperand(0);
-      IsIndexSignExt = true;
-    }
-
-    // Either the case of Base + Index (no offset) or something else.
-    if (IndexOffset->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
-
-    // Now we have the case of Base + Index + offset.
-    SDValue Index = IndexOffset->getOperand(0);
-    SDValue Offset = IndexOffset->getOperand(1);
-
-    if (!isa<ConstantSDNode>(Offset))
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // Ignore signextends.
-    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
-      Index = Index->getOperand(0);
-      IsIndexSignExt = true;
-    } else IsIndexSignExt = false;
-
-    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
-    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
-  }
-};
-} // namespace
-
 // This is a helper function for visitMUL to check the profitability
 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 // MulNode is the original multiply, AddNode is (add x, c1),
@@ -11351,6 +11553,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     }
   }
 
+  StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
   return true;
 }
 
@@ -11493,7 +11696,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   return true;
 }
 
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+bool DAGCombiner::MergeConsecutiveStores(
+    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -11537,16 +11741,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // any of the store nodes.
   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
 
-  // Save the StoreSDNodes that we find in the chain.
-  SmallVector<MemOpLink, 8> StoreNodes;
-
   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
 
   // Check if there is anything to merge.
   if (StoreNodes.size() < 2)
     return false;
 
-  // only do dep endence check in AA case
+  // only do dependence check in AA case
   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
                                                   : DAG.getSubtarget().useAA();
   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
@@ -11582,10 +11783,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
     // Check if this store interferes with any of the loads that we found.
     // If we find a load that alias with this store. Stop the sequence.
-    if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
-                    [&](LSBaseSDNode* Ldn) {
-                      return isAlias(Ldn, StoreNodes[i].MemNode);
-                    }))
+    if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
+          return isAlias(Ldn, StoreNodes[i].MemNode);
+        }))
       break;
 
     // Mark this node as useful.
@@ -11899,6 +12099,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     }
   }
 
+  StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
   return true;
 }
 
@@ -12088,11 +12289,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
-    SDValue Shorter =
-      GetDemandedBits(Value,
-                      APInt::getLowBitsSet(
-                        Value.getValueType().getScalarType().getSizeInBits(),
-                        ST->getMemoryVT().getScalarType().getSizeInBits()));
+    SDValue Shorter = GetDemandedBits(
+        Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+                                    ST->getMemoryVT().getScalarSizeInBits()));
     AddToWorklist(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
@@ -12100,10 +12299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
-    if (SimplifyDemandedBits(Value,
-                        APInt::getLowBitsSet(
-                          Value.getValueType().getScalarType().getSizeInBits(),
-                          ST->getMemoryVT().getScalarType().getSizeInBits())))
+    if (SimplifyDemandedBits(
+            Value,
+            APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+                                 ST->getMemoryVT().getScalarSizeInBits())))
       return SDValue(N, 0);
   }
 
@@ -12144,19 +12343,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Only perform this optimization before the types are legal, because we
   // don't want to perform this optimization on every DAGCombine invocation.
   if (!LegalTypes) {
-    bool EverChanged = false;
-
-    do {
+    for (;;) {
       // There can be multiple store sequences on the same chain.
       // Keep trying to merge store sequences until we are unable to do so
       // or until we merge the last store on the chain.
-      bool Changed = MergeConsecutiveStores(ST);
-      EverChanged |= Changed;
+      SmallVector<MemOpLink, 8> StoreNodes;
+      bool Changed = MergeConsecutiveStores(ST, StoreNodes);
       if (!Changed) break;
-    } while (ST->getOpcode() != ISD::DELETED_NODE);
 
-    if (EverChanged)
-      return SDValue(N, 0);
+      if (any_of(StoreNodes,
+                 [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
+        // ST has been merged and no longer exists.
+        return SDValue(N, 0);
+      }
+    }
   }
 
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -12169,14 +12369,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       return NewSt;
   }
 
+  if (SDValue NewSt = splitMergedValStore(ST))
+    return NewSt;
+
   return ReduceLoadOpStoreWidth(N);
 }
 
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+///   (store (or (zext (bitcast F to i32) to i64),
+///              (shl (zext I to i64), 32)), addr)  -->
+///   (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8},  i32 store --> two i16 stores.
+/// For pair of {i8, i8},   i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+///   void goo(const std::pair<int, float> &);
+///   hoo() {
+///     ...
+///     goo(std::make_pair(tmp, ftmp));
+///     ...
+///   }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+  if (OptLevel == CodeGenOpt::None)
+    return SDValue();
+
+  SDValue Val = ST->getValue();
+  SDLoc DL(ST);
+
+  // Match OR operand.
+  if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+    return SDValue();
+
+  // Match SHL operand and get Lower and Higher parts of Val.
+  SDValue Op1 = Val.getOperand(0);
+  SDValue Op2 = Val.getOperand(1);
+  SDValue Lo, Hi;
+  if (Op1.getOpcode() != ISD::SHL) {
+    std::swap(Op1, Op2);
+    if (Op1.getOpcode() != ISD::SHL)
+      return SDValue();
+  }
+  Lo = Op2;
+  Hi = Op1.getOperand(0);
+  if (!Op1.hasOneUse())
+    return SDValue();
+
+  // Match shift amount to HalfValBitSize.
+  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+  ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+  if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+    return SDValue();
+
+  // Lo and Hi are zero-extended from int with size less equal than 32
+  // to i64.
+  if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+      !Lo.getOperand(0).getValueType().isScalarInteger() ||
+      Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+      Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+      !Hi.getOperand(0).getValueType().isScalarInteger() ||
+      Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+    return SDValue();
+
+  // Use the EVT of low and high parts before bitcast as the input
+  // of target query.
+  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+                  ? Lo.getOperand(0).getValueType()
+                  : Lo.getValueType();
+  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+                   ? Hi.getOperand(0).getValueType()
+                   : Hi.getValueType();
+  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+    return SDValue();
+
+  // Start to split store.
+  unsigned Alignment = ST->getAlignment();
+  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = ST->getAAInfo();
+
+  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  // Lower value store.
+  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+                             ST->getAlignment(), MMOFlags, AAInfo);
+  Ptr =
+      DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+  // Higher value store.
+  SDValue St1 =
+      DAG.getStore(St0, DL, Hi, Ptr,
+                   ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+                   Alignment / 2, MMOFlags, AAInfo);
+  return St1;
+}
+
 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue InVec = N->getOperand(0);
   SDValue InVal = N->getOperand(1);
   SDValue EltNo = N->getOperand(2);
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   // If the inserted element is an UNDEF, just use the input vector.
   if (InVal.isUndef())
@@ -12206,7 +12515,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
     if (Elt < OtherElt) {
       // Swap nodes.
-      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
                                   InVec.getOperand(0), InVal, EltNo);
       AddToWorklist(NewOp.getNode());
       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
@@ -12237,13 +12546,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     EVT OpVT = Ops[0].getValueType();
     if (InVal.getValueType() != OpVT)
       InVal = OpVT.bitsGT(InVal.getValueType()) ?
-                DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
-                DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+                DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
+                DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
     Ops[Elt] = InVal;
   }
 
   // Return the new vector
-  return DAG.getBuildVector(VT, dl, Ops);
+  return DAG.getBuildVector(VT, DL, Ops);
 }
 
 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
@@ -12544,7 +12853,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
     return SDValue();
 
   unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+  SDLoc DL(N);
   EVT VT = N->getValueType(0);
 
   // Check to see if this is a BUILD_VECTOR of a bunch of values
@@ -12603,7 +12912,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   assert(ElemRatio > 1 && "Invalid element size ratio");
   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                               DAG.getConstant(0, SDLoc(N), SourceType);
+                               DAG.getConstant(0, DL, SourceType);
 
   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -12634,7 +12943,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   if (!isTypeLegal(VecVT)) return SDValue();
 
   // Make the new BUILD_VECTOR.
-  SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
+  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
 
   // The new BUILD_VECTOR node has the potential to be further optimized.
   AddToWorklist(BV.getNode());
@@ -12646,7 +12955,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   EVT SrcVT = MVT::Other;
   unsigned Opcode = ISD::DELETED_NODE;
@@ -12707,30 +13016,126 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
     else
       Opnds.push_back(In.getOperand(0));
   }
-  SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
+  SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
   AddToWorklist(BV.getNode());
 
-  return DAG.getNode(Opcode, dl, VT, BV);
+  return DAG.getNode(Opcode, DL, VT, BV);
 }
 
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
-  unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+                                           ArrayRef<int> VectorMask,
+                                           SDValue VecIn1, SDValue VecIn2,
+                                           unsigned LeftIdx) {
+  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+
   EVT VT = N->getValueType(0);
+  EVT InVT1 = VecIn1.getValueType();
+  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+  unsigned Vec2Offset = InVT1.getVectorNumElements();
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned ShuffleNumElems = NumElems;
+
+  // We can't generate a shuffle node with mismatched input and output types.
+  // Try to make the types match the type of the output.
+  if (InVT1 != VT || InVT2 != VT) {
+    if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+      // If the output vector length is a multiple of both input lengths,
+      // we can concatenate them and pad the rest with undefs.
+      unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+      assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+      SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+      ConcatOps[0] = VecIn1;
+      ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+      VecIn2 = SDValue();
+    } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+      if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+        return SDValue();
 
-  // A vector built entirely of undefs is undef.
-  if (ISD::allOperandsUndef(N))
-    return DAG.getUNDEF(VT);
+      if (!VecIn2.getNode()) {
+        // If we only have one input vector, and it's twice the size of the
+        // output, split it in two.
+        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+                             DAG.getConstant(NumElems, DL, IdxTy));
+        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+        // Since we now have shorter input vectors, adjust the offset of the
+        // second vector's start.
+        Vec2Offset = NumElems;
+      } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+        // VecIn1 is wider than the output, and we have another, possibly
+        // smaller input. Pad the smaller input with undefs, shuffle at the
+        // input vector width, and extract the output.
+        // The shuffle type is different than VT, so check legality again.
+        if (LegalOperations &&
+            !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+          return SDValue();
 
-  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
-    return V;
+        // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+        // lower it back into a BUILD_VECTOR. So if the inserted type is
+        // illegal, don't even try.
+        if (InVT1 != InVT2) {
+          if (!TLI.isTypeLegal(InVT2))
+            return SDValue();
+          VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+                               DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+        }
+        ShuffleNumElems = NumElems * 2;
+      } else {
+        // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
+        // than VecIn1. We can't handle this for now - this case will disappear
+        // when we start sorting the vectors by type.
+        return SDValue();
+      }
+    } else {
+      // TODO: Support cases where the length mismatch isn't exactly by a
+      // factor of 2.
+      // TODO: Move this check upwards, so that if we have bad type
+      // mismatches, we don't create any DAG nodes.
+      return SDValue();
+    }
+  }
 
-  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
-    return V;
+  // Initialize mask to undef.
+  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+  // Only need to run up to the number of elements actually used, not the
+  // total number of elements in the shuffle - if we are shuffling a wider
+  // vector, the high lanes should be set to undef.
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (VectorMask[i] <= 0)
+      continue;
+
+    unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+    if (VectorMask[i] == (int)LeftIdx) {
+      Mask[i] = ExtIndex;
+    } else if (VectorMask[i] == (int)LeftIdx + 1) {
+      Mask[i] = Vec2Offset + ExtIndex;
+    }
+  }
+
+  // The type the input vectors may have changed above.
+  InVT1 = VecIn1.getValueType();
+
+  // If we already have a VecIn2, it should have the same type as VecIn1.
+  // If we don't, get an undef/zero vector of the appropriate type.
+  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
+
+  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+  if (ShuffleNumElems > NumElems)
+    Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
 
-  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
-  // at most two distinct vectors, turn this into a shuffle node.
+  return Shuffle;
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
 
   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   if (!isTypeLegal(VT))
@@ -12740,149 +13145,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
     return SDValue();
 
-  SDValue VecIn1, VecIn2;
   bool UsesZeroVector = false;
-  for (unsigned i = 0; i != NumInScalars; ++i) {
+  unsigned NumElems = N->getNumOperands();
+
+  // Record, for each element of the newly built vector, which input vector
+  // that element comes from. -1 stands for undef, 0 for the zero vector,
+  // and positive values for the input vectors.
+  // VectorMask maps each element to its vector number, and VecIn maps vector
+  // numbers to their initial SDValues.
+
+  SmallVector<int, 8> VectorMask(NumElems, -1);
+  SmallVector<SDValue, 8> VecIn;
+  VecIn.push_back(SDValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = N->getOperand(i);
-    // Ignore undef inputs.
-    if (Op.isUndef()) continue;
 
-    // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
+    if (Op.isUndef())
+      continue;
+
+    // See if we can use a blend with a zero vector.
+    // TODO: Should we generalize this to a blend with an arbitrary constant
+    // vector?
+    if (isNullConstant(Op) || isNullFPConstant(Op)) {
       UsesZeroVector = true;
+      VectorMask[i] = 0;
       continue;
     }
 
-    // If this input is something other than a EXTRACT_VECTOR_ELT with a
-    // constant index, bail out.
+    // Not an undef or zero. If the input is something other than an
+    // EXTRACT_VECTOR_ELT with a constant index, bail out.
     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(Op.getOperand(1))) {
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
-    }
+        !isa<ConstantSDNode>(Op.getOperand(1)))
+      return SDValue();
 
-    // We allow up to two distinct input vectors.
     SDValue ExtractedFromVec = Op.getOperand(0);
-    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
-      continue;
 
-    if (!VecIn1.getNode()) {
-      VecIn1 = ExtractedFromVec;
-    } else if (!VecIn2.getNode() && !UsesZeroVector) {
-      VecIn2 = ExtractedFromVec;
-    } else {
-      // Too many inputs.
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
-    }
-  }
-
-  // If everything is good, we can make a shuffle operation.
-  if (VecIn1.getNode()) {
-    unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
-    SmallVector<int, 8> Mask;
-    for (unsigned i = 0; i != NumInScalars; ++i) {
-      unsigned Opcode = N->getOperand(i).getOpcode();
-      if (Opcode == ISD::UNDEF) {
-        Mask.push_back(-1);
-        continue;
-      }
+    // All inputs must have the same element type as the output.
+    if (VT.getVectorElementType() !=
+        ExtractedFromVec.getValueType().getVectorElementType())
+      return SDValue();
 
-      // Operands can also be zero.
-      if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
-        assert(UsesZeroVector &&
-               (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
-               "Unexpected node found!");
-        Mask.push_back(NumInScalars+i);
-        continue;
-      }
+    // Have we seen this input vector before?
+    // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+    // a map back from SDValues to numbers isn't worth it.
+    unsigned Idx = std::distance(
+        VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+    if (Idx == VecIn.size())
+      VecIn.push_back(ExtractedFromVec);
 
-      // If extracting from the first vector, just use the index directly.
-      SDValue Extract = N->getOperand(i);
-      SDValue ExtVal = Extract.getOperand(1);
-      unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
-      if (Extract.getOperand(0) == VecIn1) {
-        Mask.push_back(ExtIndex);
-        continue;
-      }
+    VectorMask[i] = Idx;
+  }
 
-      // Otherwise, use InIdx + InputVecSize
-      Mask.push_back(InNumElements + ExtIndex);
-    }
+  // If we didn't find at least one input vector, bail out.
+  if (VecIn.size() < 2)
+    return SDValue();
 
-    // Avoid introducing illegal shuffles with zero.
-    if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+  // TODO: We want to sort the vectors by descending length, so that adjacent
+  // pairs have similar length, and the longer vector is always first in the
+  // pair.
+
+  // TODO: Should this fire if some of the input vectors has illegal type (like
+  // it does now), or should we let legalization run its course first?
+
+  // Shuffle phase:
+  // Take pairs of vectors, and shuffle them so that the result has elements
+  // from these vectors in the correct places.
+  // For example, given:
+  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+  // We will generate:
+  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+  SmallVector<SDValue, 4> Shuffles;
+  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+    unsigned LeftIdx = 2 * In + 1;
+    SDValue VecLeft = VecIn[LeftIdx];
+    SDValue VecRight =
+        (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+    if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+                                                VecRight, LeftIdx))
+      Shuffles.push_back(Shuffle);
+    else
       return SDValue();
+  }
 
-    // We can't generate a shuffle node with mismatched input and output types.
-    // Attempt to transform a single input vector to the correct type.
-    if ((VT != VecIn1.getValueType())) {
-      // If the input vector type has a different base type to the output
-      // vector type, bail out.
-      EVT VTElemType = VT.getVectorElementType();
-      if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
-          (VecIn2.getNode() &&
-           (VecIn2.getValueType().getVectorElementType() != VTElemType)))
-        return SDValue();
+  // If we need the zero vector as an "ingredient" in the blend tree, add it
+  // to the list of shuffles.
+  if (UsesZeroVector)
+    Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+                                      : DAG.getConstantFP(0.0, DL, VT));
 
-      // If the input vector is too small, widen it.
-      // We only support widening of vectors which are half the size of the
-      // output registers. For example XMM->YMM widening on X86 with AVX.
-      EVT VecInT = VecIn1.getValueType();
-      if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
-        // If we only have one small input, widen it by adding undef values.
-        if (!VecIn2.getNode())
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
-                               DAG.getUNDEF(VecIn1.getValueType()));
-        else if (VecIn1.getValueType() == VecIn2.getValueType()) {
-          // If we have two small inputs of the same type, try to concat them.
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
-          VecIn2 = SDValue(nullptr, 0);
-        } else
-          return SDValue();
-      } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
-        // If the input vector is too large, try to split it.
-        // We don't support having two input vectors that are too large.
-        // If the zero vector was used, we can not split the vector,
-        // since we'd need 3 inputs.
-        if (UsesZeroVector || VecIn2.getNode())
-          return SDValue();
+  // If we only have one shuffle, we're done.
+  if (Shuffles.size() == 1)
+    return Shuffles[0];
 
-        if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
-          return SDValue();
+  // Update the vector mask to point to the post-shuffle vectors.
+  for (int &Vec : VectorMask)
+    if (Vec == 0)
+      Vec = Shuffles.size() - 1;
+    else
+      Vec = (Vec - 1) / 2;
+
+  // More than one shuffle. Generate a binary tree of blends, e.g. if from
+  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+  // generate:
+  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+  // Make sure the initial size of the shuffle list is even.
+  if (Shuffles.size() % 2)
+    Shuffles.push_back(DAG.getUNDEF(VT));
+
+  for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+    if (CurSize % 2) {
+      Shuffles[CurSize] = DAG.getUNDEF(VT);
+      CurSize++;
+    }
+    for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+      int Left = 2 * In;
+      int Right = 2 * In + 1;
+      SmallVector<int, 8> Mask(NumElems, -1);
+      for (unsigned i = 0; i != NumElems; ++i) {
+        if (VectorMask[i] == Left) {
+          Mask[i] = i;
+          VectorMask[i] = In;
+        } else if (VectorMask[i] == Right) {
+          Mask[i] = i + NumElems;
+          VectorMask[i] = In;
+        }
+      }
 
-        // Try to replace VecIn1 with two extract_subvectors
-        // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(VT.getVectorNumElements(), dl,
-                            TLI.getVectorIdxTy(DAG.getDataLayout())));
-        VecIn1 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      } else
-        return SDValue();
+      Shuffles[In] =
+          DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
     }
+  }
 
-    if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
-                                DAG.getConstantFP(0.0, dl, VT);
-    else
-      // If VecIn2 is unused then change it to undef.
-      VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+  return Shuffles[0];
+}
 
-    // Check that we were able to transform all incoming values to the same
-    // type.
-    if (VecIn2.getValueType() != VecIn1.getValueType() ||
-        VecIn1.getValueType() != VT)
-          return SDValue();
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  EVT VT = N->getValueType(0);
 
-    // Return the new VECTOR_SHUFFLE node.
-    SDValue Ops[2];
-    Ops[0] = VecIn1;
-    Ops[1] = VecIn2;
-    return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
-  }
+  // A vector built entirely of undefs is undef.
+  if (ISD::allOperandsUndef(N))
+    return DAG.getUNDEF(VT);
+
+  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecToShuffle(N))
+    return V;
 
   return SDValue();
 }
@@ -13071,8 +13496,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
         return SDValue();
 
-      SDLoc dl = SDLoc(N);
-      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
       return DAG.getBitcast(VT, Res);
     }
   }
@@ -13208,7 +13632,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     V = V.getOperand(0);
 
   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
-    SDLoc dl(N);
     // Handle only simple case where vector being inserted and vector
     // being extracted are of same type, and are half size of larger vectors.
     EVT BigVT = V->getOperand(0).getValueType();
@@ -13228,11 +13651,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       // Into:
       //    indices are equal or bit offsets are equal => V1
       //    otherwise => (extract_subvec V1, ExtIdx)
-      if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
-          ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+      if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
+          ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
         return DAG.getBitcast(NVT, V->getOperand(1));
       return DAG.getNode(
-          ISD::EXTRACT_SUBVECTOR, dl, NVT,
+          ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
           N->getOperand(1));
     }
@@ -13391,6 +13814,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
 }
 
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load.  A BUILD_VECTOR where each
+// element is identical is a splat.  A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
+  EVT VT = SVN->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue N0 = SVN->getOperand(0);
+  SDValue N1 = SVN->getOperand(1);
+
+  if (!N0->hasOneUse() || !N1->hasOneUse())
+    return SDValue();
+  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+  // discussed above.
+  if (!N1.isUndef()) {
+    bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
+    bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+    if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+      return SDValue();
+    if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+      return SDValue();
+  }
+
+  SmallVector<SDValue, 8> Ops;
+  SmallSet<SDValue, 16> DuplicateOps;
+  for (int M : SVN->getMask()) {
+    SDValue Op = DAG.getUNDEF(VT.getScalarType());
+    if (M >= 0) {
+      int Idx = M < (int)NumElts ? M : M - NumElts;
+      SDValue &S = (M < (int)NumElts ? N0 : N1);
+      if (S.getOpcode() == ISD::BUILD_VECTOR) {
+        Op = S.getOperand(Idx);
+      } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+        if (Idx == 0)
+          Op = S.getOperand(0);
+      } else {
+        // Operand can't be combined - bail out.
+        return SDValue();
+      }
+    }
+
+    // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
+    // fine, but it's likely to generate low-quality code if the target can't
+    // reconstruct an appropriate shuffle.
+    if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+      if (!DuplicateOps.insert(Op).second)
+        return SDValue();
+
+    Ops.push_back(Op);
+  }
+  // BUILD_VECTOR requires all inputs to be of the same type, find the
+  // maximum type and extend them all.
+  EVT SVT = VT.getScalarType();
+  if (SVT.isInteger())
+    for (SDValue &Op : Ops)
+      SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+  if (SVT != VT.getScalarType())
+    for (SDValue &Op : Ops)
+      Op = TLI.isZExtFree(Op.getValueType(), SVT)
+               ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -13506,40 +14007,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
-  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
-    SmallVector<SDValue, 8> Ops;
-    for (int M : SVN->getMask()) {
-      SDValue Op = DAG.getUNDEF(VT.getScalarType());
-      if (M >= 0) {
-        int Idx = M % NumElts;
-        SDValue &S = (M < (int)NumElts ? N0 : N1);
-        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
-          Op = S.getOperand(Idx);
-        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
-          if (Idx == 0)
-            Op = S.getOperand(0);
-        } else {
-          // Operand can't be combined - bail out.
-          break;
-        }
-      }
-      Ops.push_back(Op);
-    }
-    if (Ops.size() == VT.getVectorNumElements()) {
-      // BUILD_VECTOR requires all inputs to be of the same type, find the
-      // maximum type and extend them all.
-      EVT SVT = VT.getScalarType();
-      if (SVT.isInteger())
-        for (SDValue &Op : Ops)
-          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
-      if (SVT != VT.getScalarType())
-        for (SDValue &Op : Ops)
-          Op = TLI.isZExtFree(Op.getValueType(), SVT)
-                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
-                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
-      return DAG.getBuildVector(VT, SDLoc(N), Ops);
-    }
-  }
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+    if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+      return Res;
 
   // If this shuffle only has a single input that is a bitcasted shuffle,
   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
@@ -13647,6 +14117,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
 
+    // Don't try to fold splats; they're likely to simplify somehow, or they
+    // might be free.
+    if (OtherSV->isSplat())
+      return SDValue();
+
     // The incoming shuffle must be of the same type as the result of the
     // current shuffle.
     assert(OtherSV->getOperand(0).getValueType() == VT &&
@@ -13773,10 +14248,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
 
+  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+      N0.getOperand(1).getValueType() == N1.getValueType() &&
+      N0.getOperand(2) == N2)
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+                       N1, N2);
+
   if (N0.getValueType() != N1.getValueType())
     return SDValue();
 
@@ -13785,7 +14270,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
       N2.getOpcode() == ISD::Constant) {
     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
-    EVT VT = N->getValueType(0);
 
     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
     // (concat_vectors Z, Y)
@@ -13836,7 +14320,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   // Make sure we're not running after operation legalization where it
   // may have custom lowered the vector shuffles.
@@ -13904,8 +14388,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
       return SDValue();
 
-    SDValue Zero = DAG.getConstant(0, dl, ClearVT);
-    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+    SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
                                                    DAG.getBitcast(ClearVT, LHS),
                                                    Zero, Indices));
   };
@@ -14119,6 +14603,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
     if (!RLD->isInvariant())
       MMOFlags &= ~MachineMemOperand::MOInvariant;
+    if (!RLD->isDereferenceable())
+      MMOFlags &= ~MachineMemOperand::MODereferenceable;
     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
       // FIXME: Discards pointer and AA info.
       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
@@ -14146,6 +14632,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   return false;
 }
 
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+                                            SDValue N1, SDValue N2, SDValue N3,
+                                            ISD::CondCode CC) {
+  // If this is a select where the false operand is zero and the compare is a
+  // check of the sign bit, see if we can perform the "gzip trick":
+  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+  EVT XType = N0.getValueType();
+  EVT AType = N2.getValueType();
+  if (!isNullConstant(N3) || !XType.bitsGE(AType))
+    return SDValue();
+
+  // If the comparison is testing for a positive value, we have to invert
+  // the sign bit mask, so only do that transform if the target has a bitwise
+  // 'and not' instruction (the invert is free).
+  if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+    // (X > -1) ? A : 0
+    // (X >  0) ? X : 0 <-- This is canonical signed max.
+    if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+      return SDValue();
+  } else if (CC == ISD::SETLT) {
+    // (X <  0) ? A : 0
+    // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
+    if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+      return SDValue();
+  } else {
+    return SDValue();
+  }
+
+  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+  // constant.
+  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+    unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+    SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+    SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+    AddToWorklist(Shift.getNode());
+
+    if (XType.bitsGT(AType)) {
+      Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+      AddToWorklist(Shift.getNode());
+    }
+
+    if (CC == ISD::SETGT)
+      Shift = DAG.getNOT(DL, Shift, AType);
+
+    return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+  }
+
+  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+  AddToWorklist(Shift.getNode());
+
+  if (XType.bitsGT(AType)) {
+    Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+    AddToWorklist(Shift.getNode());
+  }
+
+  if (CC == ISD::SETGT)
+    Shift = DAG.getNOT(DL, Shift, AType);
+
+  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
 /// where 'cond' is the comparison specified by CC.
 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -14242,48 +14795,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
       }
     }
 
-  // Check to see if we can perform the "gzip trick", transforming
-  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
-  if (isNullConstant(N3) && CC == ISD::SETLT &&
-      (isNullConstant(N1) ||                 // (a < 0) ? b : 0
-       (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
-    EVT XType = N0.getValueType();
-    EVT AType = N2.getValueType();
-    if (XType.bitsGE(AType)) {
-      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
-      // single-bit constant.
-      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
-        unsigned ShCtV = N2C->getAPIntValue().logBase2();
-        ShCtV = XType.getSizeInBits() - ShCtV - 1;
-        SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
-                                       getShiftAmountTy(N0.getValueType()));
-        SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
-                                    XType, N0, ShCt);
-        AddToWorklist(Shift.getNode());
-
-        if (XType.bitsGT(AType)) {
-          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-          AddToWorklist(Shift.getNode());
-        }
-
-        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
-      }
-
-      SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
-                                  XType, N0,
-                                  DAG.getConstant(XType.getSizeInBits() - 1,
-                                                  SDLoc(N0),
-                                         getShiftAmountTy(N0.getValueType())));
-      AddToWorklist(Shift.getNode());
-
-      if (XType.bitsGT(AType)) {
-        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-        AddToWorklist(Shift.getNode());
-      }
-
-      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
-    }
-  }
+  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+    return V;
 
   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   // where y is has a single bit set.
@@ -14511,30 +15024,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   return S;
 }
 
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+  EVT VT = V.getValueType();
+  unsigned EltBits = VT.getScalarSizeInBits();
+  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+  return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+///   F(X) = A X - 1 [which has a zero at X = 1/A]
+///     =>
+///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+///     does not require additional intermediate precision]
 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  // TODO: Handle half and/or extended types?
+  EVT VT = Op.getValueType();
+  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+    return SDValue();
+
+  // If estimates are explicitly disabled for this function, we're done.
+  MachineFunction &MF = DAG.getMachineFunction();
+  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+  if (Enabled == TLI.ReciprocalEstimate::Disabled)
+    return SDValue();
+
+  // Estimates may be explicitly enabled for this type with a custom number of
+  // refinement steps.
+  int Iterations = TLI.getDivRefinementSteps(VT, MF);
+  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+    AddToWorklist(Est.getNode());
 
-  unsigned Iterations = 0;
-  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
     if (Iterations) {
-      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-      // For the reciprocal, we need to find the zero of the function:
-      //   F(X) = A X - 1 [which has a zero at X = 1/A]
-      //     =>
-      //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
-      //     does not require additional intermediate precision]
       EVT VT = Op.getValueType();
       SDLoc DL(Op);
       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
 
-      AddToWorklist(Est.getNode());
-
       // Newton iterations: Est = Est + Est (1 - Arg * Est)
-      for (unsigned i = 0; i < Iterations; ++i) {
+      for (int i = 0; i < Iterations; ++i) {
         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
         AddToWorklist(NewEst.getNode());
 
@@ -14656,16 +15190,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
-  unsigned Iterations = 0;
+  // TODO: Handle half and/or extended types?
+  EVT VT = Op.getValueType();
+  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+    return SDValue();
+
+  // If estimates are explicitly disabled for this function, we're done.
+  MachineFunction &MF = DAG.getMachineFunction();
+  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+  if (Enabled == TLI.ReciprocalEstimate::Disabled)
+    return SDValue();
+
+  // Estimates may be explicitly enabled for this type with a custom number of
+  // refinement steps.
+  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
   bool UseOneConstNR = false;
-  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+  if (SDValue Est =
+      TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+                          Reciprocal)) {
     AddToWorklist(Est.getNode());
+
     if (Iterations) {
       Est = UseOneConstNR
-                ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
-                : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+            ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+            : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+      if (!Reciprocal) {
+        // Unfortunately, Est is now NaN if the input was exactly 0.0.
+        // Select out this case and force the answer to 0.0.
+        EVT VT = Op.getValueType();
+        SDLoc DL(Op);
+
+        SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+        EVT CCVT = getSetCCResultType(VT);
+        SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+        AddToWorklist(ZeroCmp.getNode());
+
+        Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+                          ZeroCmp, FPZero, Est);
+        AddToWorklist(Est.getNode());
+      }
     }
     return Est;
   }
@@ -14678,23 +15243,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
 }
 
 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
-  SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
-  if (!Est)
-    return SDValue();
-
-  // Unfortunately, Est is now NaN if the input was exactly 0.
-  // Select out this case and force the answer to 0.
-  EVT VT = Est.getValueType();
-  SDLoc DL(Op);
-  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-  EVT CCVT = getSetCCResultType(VT);
-  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
-  AddToWorklist(ZeroCmp.getNode());
-
-  Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
-                    Zero, Est);
-  AddToWorklist(Est.getNode());
-  return Est;
+  return buildSqrtEstimateImpl(Op, Flags, false);
 }
 
 /// Return true if base is a frame index, which is known not to alias with
@@ -14771,9 +15320,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   // To catch this case, look up the actual index of frame indices to compute
   // the real alias relationship.
   if (isFrameIndex1 && isFrameIndex2) {
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
-    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index b10da00..e2f33bb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -353,8 +353,8 @@ void FastISel::recomputeInsertPt() {
 
 void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
                               MachineBasicBlock::iterator E) {
-  assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) &&
-         std::distance(I, E) > 0 && "Invalid iterator!");
+  assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
+         "Invalid iterator!");
   while (I != E) {
     MachineInstr *Dead = &*I;
     ++I;
@@ -455,17 +455,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
     return true;
   }
 
-  // Check if the second operand is a constant float.
-  if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
-    unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, Op0IsKill, CF);
-    if (ResultReg) {
-      // We successfully emitted code for the given LLVM Instruction.
-      updateValueMap(I, ResultReg);
-      return true;
-    }
-  }
-
   unsigned Op1 = getRegForValue(I->getOperand(1));
   if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
     return false;
@@ -499,7 +488,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
-    if (auto *StTy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
       uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
         // N = N + Offset
@@ -581,7 +570,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
       Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
       Ops.push_back(MachineOperand::CreateImm(0));
     } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
-      // Values coming from a stack location also require a sepcial encoding,
+      // Values coming from a stack location also require a special encoding,
       // but that is added later on by the target specific frame index
       // elimination implementation.
       auto SI = FuncInfo.StaticAllocaMap.find(AI);
@@ -666,7 +655,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
       .addImm(0);
 
   // Inform the Frame Information that we have a stackmap in this function.
-  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+  FuncInfo.MF->getFrameInfo().setHasStackMap();
 
   return true;
 }
@@ -707,7 +696,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
 
 FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
     const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
-    const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+    StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
   SmallString<32> MangledName;
   Mangler::getNameWithPrefix(MangledName, Target, DL);
   MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
@@ -845,7 +834,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   CLI.Call->eraseFromParent();
 
   // Inform the Frame Information that we have a patchpoint in this function.
-  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 
   if (CLI.NumResultRegs)
     updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
@@ -1077,7 +1066,7 @@ bool FastISel::selectCall(const User *I) {
   }
 
   MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
-  ComputeUsesVAFloatArgument(*Call, &MMI);
+  computeUsesVAFloatArgument(*Call, MMI);
 
   // Handle intrinsic function calls.
   if (const auto *II = dyn_cast<IntrinsicInst>(Call))
@@ -1104,6 +1093,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   case Intrinsic::lifetime_end:
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
+  // Neither does the assume intrinsic; it's also OK not to codegen its operand.
+  case Intrinsic::assume:
     return true;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
@@ -1225,6 +1216,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::invariant_group_barrier:
   case Intrinsic::expect: {
     unsigned ResultReg = getRegForValue(II->getArgOperand(0));
     if (!ResultReg)
@@ -1324,15 +1316,6 @@ bool FastISel::selectBitCast(const User *I) {
   return true;
 }
 
-// Return true if we should copy from swift error to the final vreg as specified
-// by SwiftErrorWorklist.
-static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI,
-                                              FunctionLoweringInfo &FuncInfo) {
-  if (!TLI.supportSwiftError())
-    return false;
-  return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB);
-}
-
 // Remove local value instructions starting from the instruction after
 // SavedLastLocalValue to the current function insert point.
 void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1357,10 +1340,6 @@ bool FastISel::selectInstruction(const Instruction *I) {
   // Just before the terminator instruction, insert instructions to
   // feed PHI nodes in successor blocks.
   if (isa<TerminatorInst>(I)) {
-    // If we need to materialize any vreg from worklist, we bail out of
-    // FastISel.
-    if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo))
-      return false;
     if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
       // PHI node handling may have generated local value instructions,
       // even though it failed to handle all PHI nodes.
@@ -1444,7 +1423,7 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
     // fall-through case, which needs no instructions.
   } else {
     // The unconditional branch case.
-    TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
+    TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
                      SmallVector<MachineOperand, 0>(), DbgLoc);
   }
   if (FuncInfo.BPI) {
@@ -1679,7 +1658,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
                    const TargetLibraryInfo *LibInfo,
                    bool SkipTargetIndependentISel)
     : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
-      MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+      MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
       TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
@@ -1723,18 +1702,6 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
   return 0;
 }
 
-unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
-                               bool /*Op0IsKill*/,
-                               const ConstantFP * /*FPImm*/) {
-  return 0;
-}
-
-unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
-                                bool /*Op0IsKill*/, unsigned /*Op1*/,
-                                bool /*Op1IsKill*/, uint64_t /*Imm*/) {
-  return 0;
-}
-
 /// This method is a wrapper of fastEmit_ri. It first tries to emit an
 /// instruction with an immediate operand using fastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
@@ -2181,6 +2148,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
 
   bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
   bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+  bool IsDereferenceable =
+      I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
   const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
 
   AAMDNodes AAInfo;
@@ -2195,6 +2164,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
     Flags |= MachineMemOperand::MOVolatile;
   if (IsNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+  if (IsDereferenceable)
+    Flags |= MachineMemOperand::MODereferenceable;
   if (IsInvariant)
     Flags |= MachineMemOperand::MOInvariant;
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index e669ffc..377a523 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -98,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                        Fn->isVarArg(), Outs, Fn->getContext());
 
   // If this personality uses funclets, we need to do a bit more work.
-  DenseMap<const AllocaInst *, int *> CatchObjects;
+  DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
   EHPersonality Personality = classifyEHPersonality(
       Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
   if (isFuncletEHPersonality(Personality)) {
@@ -115,7 +114,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
     for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
       for (WinEHHandlerType &H : TBME.HandlerArray) {
         if (const AllocaInst *AI = H.CatchObj.Alloca)
-          CatchObjects.insert({AI, &H.CatchObj.FrameIndex});
+          CatchObjects.insert({AI, {}}).first->second.push_back(
+              &H.CatchObj.FrameIndex);
         else
           H.CatchObj.FrameIndex = INT_MAX;
       }
@@ -125,11 +125,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
-  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
-  for (; BB != EB; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
-         I != E; ++I) {
-      if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+  for (const BasicBlock &BB : *Fn) {
+    for (const Instruction &I : BB) {
+      if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
         Type *Ty = AI->getAllocatedType();
         unsigned Align =
           std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
@@ -138,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         // Static allocas can be folded into the initial stack frame
         // adjustment. For targets that don't realign the stack, don't
         // do this if there is an extra alignment requirement.
-        if (AI->isStaticAlloca() && 
+        if (AI->isStaticAlloca() &&
             (TFI->isStackRealignable() || (Align <= StackAlign))) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
           uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
@@ -148,18 +146,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           int FrameIndex = INT_MAX;
           auto Iter = CatchObjects.find(AI);
           if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
-            FrameIndex = MF->getFrameInfo()->CreateFixedObject(
+            FrameIndex = MF->getFrameInfo().CreateFixedObject(
                 TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
-            MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align);
+            MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
           } else {
             FrameIndex =
-                MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+                MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
           }
 
           StaticAllocaMap[AI] = FrameIndex;
           // Update the catch handler information.
-          if (Iter != CatchObjects.end())
-            *Iter->second = FrameIndex;
+          if (Iter != CatchObjects.end()) {
+            for (int *CatchObjPtr : Iter->second)
+              *CatchObjPtr = FrameIndex;
+          }
         } else {
           // FIXME: Overaligned static allocas should be grouped into
           // a single dynamic allocation instead of using a separate
@@ -167,20 +167,19 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           if (Align <= StackAlign)
             Align = 0;
           // Inform the Frame Information that we have variable-sized objects.
-          MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+          MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
         }
       }
 
       // Look for inline asm that clobbers the SP register.
       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-        ImmutableCallSite CS(&*I);
+        ImmutableCallSite CS(&I);
         if (isa<InlineAsm>(CS.getCalledValue())) {
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
           const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
           std::vector<TargetLowering::AsmOperandInfo> Ops =
               TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
-          for (size_t I = 0, E = Ops.size(); I != E; ++I) {
-            TargetLowering::AsmOperandInfo &Op = Ops[I];
+          for (TargetLowering::AsmOperandInfo &Op : Ops) {
             if (Op.Type == InlineAsm::isClobber) {
               // Clobbers don't have SDValue operands, hence SDValue().
               TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
@@ -188,7 +187,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                   TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
                                                     Op.ConstraintVT);
               if (PhysReg.first == SP)
-                MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+                MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
             }
           }
         }
@@ -197,28 +196,28 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // Look for calls to the @llvm.va_start intrinsic. We can omit some
       // prologue boilerplate for variadic functions that don't examine their
       // arguments.
-      if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
         if (II->getIntrinsicID() == Intrinsic::vastart)
-          MF->getFrameInfo()->setHasVAStart(true);
+          MF->getFrameInfo().setHasVAStart(true);
       }
 
       // If we have a musttail call in a variadic function, we need to ensure we
       // forward implicit register parameters.
-      if (const auto *CI = dyn_cast<CallInst>(I)) {
+      if (const auto *CI = dyn_cast<CallInst>(&I)) {
         if (CI->isMustTailCall() && Fn->isVarArg())
-          MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+          MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
       }
 
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
-      if (isUsedOutsideOfDefiningBlock(&*I))
-        if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
-          InitializeRegForValue(&*I);
+      if (isUsedOutsideOfDefiningBlock(&I))
+        if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
+          InitializeRegForValue(&I);
 
       // Collect llvm.dbg.declare information. This is done now instead of
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
-      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
         assert(DI->getVariable() && "Missing variable");
         assert(DI->getDebugLoc() && "Missing location");
         if (MMI.hasDebugInfo()) {
@@ -234,7 +233,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                 StaticAllocaMap.find(AI);
               if (SI != StaticAllocaMap.end()) { // Check for VLAs.
                 int FI = SI->second;
-                MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
+                MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
                                        FI, DI->getDebugLoc());
               }
             }
@@ -243,47 +242,52 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       }
 
       // Decide the preferred extend type for a value.
-      PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
+      PreferredExtendType[&I] = getPreferredExtendForValue(&I);
     }
+  }
 
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
-  for (BB = Fn->begin(); BB != EB; ++BB) {
+  for (const BasicBlock &BB : *Fn) {
     // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
     // are really data, and no instructions can live here.
-    if (BB->isEHPad()) {
-      const Instruction *I = BB->getFirstNonPHI();
+    if (BB.isEHPad()) {
+      const Instruction *PadInst = BB.getFirstNonPHI();
       // If this is a non-landingpad EH pad, mark this function as using
       // funclets.
       // FIXME: SEH catchpads do not create funclets, so we could avoid setting
       // this in such cases in order to improve frame layout.
-      if (!isa<LandingPadInst>(I)) {
-        MMI.setHasEHFunclets(true);
-        MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+      if (!isa<LandingPadInst>(PadInst)) {
+        MF->setHasEHFunclets(true);
+        MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
       }
-      if (isa<CatchSwitchInst>(I)) {
-        assert(&*BB->begin() == I &&
+      if (isa<CatchSwitchInst>(PadInst)) {
+        assert(&*BB.begin() == PadInst &&
                "WinEHPrepare failed to remove PHIs from imaginary BBs");
         continue;
       }
-      if (isa<FuncletPadInst>(I))
-        assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+      if (isa<FuncletPadInst>(PadInst))
+        assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
     }
 
-    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
-    MBBMap[&*BB] = MBB;
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB);
+    MBBMap[&BB] = MBB;
     MF->push_back(MBB);
 
     // Transfer the address-taken flag. This is necessary because there could
     // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
     // the first one should be marked.
-    if (BB->hasAddressTaken())
+    if (BB.hasAddressTaken())
       MBB->setHasAddressTaken();
 
+    // Mark landing pad blocks.
+    if (BB.isEHPad())
+      MBB->setIsEHPad();
+
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
-    for (BasicBlock::const_iterator I = BB->begin();
+    for (BasicBlock::const_iterator I = BB.begin();
          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       if (PN->use_empty()) continue;
 
@@ -297,8 +301,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
       SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
-      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        EVT VT = ValueVTs[vti];
+      for (EVT VT : ValueVTs) {
         unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
         const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
         for (unsigned i = 0; i != NumRegisters; ++i)
@@ -308,16 +311,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
     }
   }
 
-  // Mark landing pad blocks.
-  SmallVector<const LandingPadInst *, 4> LPads;
-  for (BB = Fn->begin(); BB != EB; ++BB) {
-    const Instruction *FNP = BB->getFirstNonPHI();
-    if (BB->isEHPad() && MBBMap.count(&*BB))
-      MBBMap[&*BB]->setIsEHPad();
-    if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
-      LPads.push_back(LPI);
-  }
-
   if (!isFuncletEHPersonality(Personality))
     return;
 
@@ -541,75 +534,26 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
   return VReg;
 }
 
-/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
-/// being passed to this variadic function, and set the MachineModuleInfo's
-/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
-/// reference to _fltused on Windows, which will link in MSVCRT's
-/// floating-point support.
-void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
-                                      MachineModuleInfo *MMI)
-{
-  FunctionType *FT = cast<FunctionType>(
-    I.getCalledValue()->getType()->getContainedType(0));
-  if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type* T = I.getArgOperand(i)->getType();
-      for (auto i : post_order(T)) {
-        if (i->isFloatingPointTy()) {
-          MMI->setUsesVAFloatArgument(true);
-          return;
-        }
-      }
-    }
-  }
-}
-
-/// AddLandingPadInfo - Extract the exception handling information from the
-/// landingpad instruction and add them to the specified machine module info.
-void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
-                             MachineBasicBlock *MBB) {
-  if (const auto *PF = dyn_cast<Function>(
-      I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
-    MMI.addPersonality(PF);
-
-  if (I.isCleanup())
-    MMI.addCleanup(MBB);
-
-  // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
-  //        but we need to do it this way because of how the DWARF EH emitter
-  //        processes the clauses.
-  for (unsigned i = I.getNumClauses(); i != 0; --i) {
-    Value *Val = I.getClause(i - 1);
-    if (I.isCatch(i - 1)) {
-      MMI.addCatchTypeInfo(MBB,
-                           dyn_cast<GlobalValue>(Val->stripPointerCasts()));
-    } else {
-      // Add filters in a list.
-      Constant *CVal = cast<Constant>(Val);
-      SmallVector<const GlobalValue*, 4> FilterList;
-      for (User::op_iterator
-             II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
-        FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
-      MMI.addFilterTypeInfo(MBB, FilterList);
-    }
-  }
-}
-
-unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                                  const Value* Val) const {
-  // Find the index in SwiftErrorVals.
-  SwiftErrorValues::const_iterator I =
-      std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
-  assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
-  return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()];
+unsigned
+FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
+                                                const Value *Val) {
+  auto Key = std::make_pair(MBB, Val);
+  auto It = SwiftErrorVRegDefMap.find(Key);
+  // If this is the first use of this swifterror value in this basic block,
+  // create a new virtual register.
+  // After we processed all basic blocks we will satisfy this "upwards exposed
+  // use" by inserting a copy or phi at the beginning of this block.
+  if (It == SwiftErrorVRegDefMap.end()) {
+    auto &DL = MF->getDataLayout();
+    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+    SwiftErrorVRegDefMap[Key] = VReg;
+    SwiftErrorVRegUpwardsUse[Key] = VReg;
+    return VReg;
+  } else return It->second;
 }
 
-void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                             const Value* Val, unsigned VReg) {
-  // Find the index in SwiftErrorVals.
-  SwiftErrorValues::iterator I =
-      std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
-  assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
-  SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg;
+void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
+    const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
+  SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c8af73a..4a9042c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -330,16 +330,24 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
   // shrink VReg's register class within reason.  For example, if VReg == GR32
   // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
   if (II) {
-    const TargetRegisterClass *DstRC = nullptr;
+    const TargetRegisterClass *OpRC = nullptr;
     if (IIOpNum < II->getNumOperands())
-      DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
-    assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) &&
-           "Expected VReg");
-    if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
-      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
-      BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
-              TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
-      VReg = NewVReg;
+      OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+
+    if (OpRC) {
+      const TargetRegisterClass *ConstrainedRC
+        = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+      if (!ConstrainedRC) {
+        OpRC = TRI->getAllocatableClass(OpRC);
+        assert(OpRC && "Constraints cannot be fulfilled for allocation");
+        unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+        BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+                TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+        VReg = NewVReg;
+      } else {
+        assert(ConstrainedRC->isAllocatable() &&
+           "Constraining an allocatable VReg produced an unallocatable class?");
+      }
     }
   }
 
@@ -494,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     const TargetRegisterClass *TRC =
       TLI->getRegClassFor(Node->getSimpleValueType(0));
 
-    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
-    MachineInstr *DefMI = MRI->getVRegDef(VReg);
+    unsigned Reg;
+    MachineInstr *DefMI;
+    RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+    if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+      Reg = R->getReg();
+      DefMI = nullptr;
+    } else {
+      Reg = getVR(Node->getOperand(0), VRBaseMap);
+      DefMI = MRI->getVRegDef(Reg);
+    }
+
     unsigned SrcReg, DstReg, DefSubIdx;
     if (DefMI &&
         TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
@@ -511,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
               TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
       MRI->clearKillFlags(SrcReg);
     } else {
-      // VReg may not support a SubIdx sub-register, and we may need to
+      // Reg may not support a SubIdx sub-register, and we may need to
       // constrain its register class or issue a COPY to a compatible register
       // class.
-      VReg = ConstrainForSubReg(VReg, SubIdx,
-                                Node->getOperand(0).getSimpleValueType(),
-                                Node->getDebugLoc());
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        Reg = ConstrainForSubReg(Reg, SubIdx,
+                                 Node->getOperand(0).getSimpleValueType(),
+                                 Node->getDebugLoc());
 
       // Create the destreg if it is missing.
       if (VRBase == 0)
         VRBase = MRI->createVirtualRegister(TRC);
 
       // Create the extract_subreg machine instruction.
-      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
-              TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+      MachineInstrBuilder CopyMI =
+          BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+                  TII->get(TargetOpcode::COPY), VRBase);
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        CopyMI.addReg(Reg, 0, SubIdx);
+      else
+        CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
     }
   } else if (Opc == TargetOpcode::INSERT_SUBREG ||
              Opc == TargetOpcode::SUBREG_TO_REG) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 18ad910..b002825 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -259,19 +259,25 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
+  APFloat APF = CFP->getValueAPF();
   EVT OrigVT = VT;
   EVT SVT = VT;
-  while (SVT != MVT::f32 && SVT != MVT::f16) {
-    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
-    if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
-        // Only do this if the target has a native EXTLOAD instruction from
-        // smaller type.
-        TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
-        TLI.ShouldShrinkFPConstant(OrigVT)) {
-      Type *SType = SVT.getTypeForEVT(*DAG.getContext());
-      LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
-      VT = SVT;
-      Extend = true;
+
+  // We don't want to shrink SNaNs. Converting the SNaN back to its real type
+  // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
+  if (!APF.isSignaling()) {
+    while (SVT != MVT::f32 && SVT != MVT::f16) {
+      SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+      if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
+          // Only do this if the target has a native EXTLOAD instruction from
+          // smaller type.
+          TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+          TLI.ShouldShrinkFPConstant(OrigVT)) {
+        Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+        LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+        VT = SVT;
+        Extend = true;
+      }
     }
   }
 
@@ -324,8 +330,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
   // supported by the target.
   EVT VT    = Tmp1.getValueType();
   EVT EltVT = VT.getVectorElementType();
-  EVT IdxVT = Tmp3.getValueType();
-  EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
   SDValue StackPtr = DAG.CreateStackTemporary(VT);
 
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -335,13 +339,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
       DAG.getEntryNode(), dl, Tmp1, StackPtr,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
 
-  // Truncate or zero extend offset to target pointer type.
-  Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
-  // Add the offset to the index.
-  unsigned EltSize = EltVT.getSizeInBits()/8;
-  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
-                     DAG.getConstant(EltSize, dl, IdxVT));
-  SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+  SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
+
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
   // Load the updated vector.
@@ -795,7 +794,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
       isCustom = true;
-      // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     case TargetLowering::Legal: {
       Value = SDValue(Node, 0);
       Chain = SDValue(Node, 1);
@@ -1013,6 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::ADJUST_TRAMPOLINE:
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
+  case ISD::ADDROFRETURNADDR:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be custom-lowered.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1061,35 +1061,41 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     case ISD::SRL:
     case ISD::SRA:
     case ISD::ROTL:
-    case ISD::ROTR:
+    case ISD::ROTR: {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Node->getOperand(1).getValueType().isVector()) {
-        SDValue SAO =
-          DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
-                                    Node->getOperand(1));
-        HandleSDNode Handle(SAO);
-        LegalizeOp(SAO.getNode());
-        NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
-                                         Handle.getValue());
+      SDValue Op0 = Node->getOperand(0);
+      SDValue Op1 = Node->getOperand(1);
+      if (!Op1.getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1);
+        // The getShiftAmountOperand() may create a new operand node or
+        // return the existing one. If new operand is created we need
+        // to update the parent node.
+        // Do not try to legalize SAO here! It will be automatically legalized
+        // in the next round.
+        if (SAO != Op1)
+          NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO);
       }
-      break;
+    }
+    break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
-    case ISD::SHL_PARTS:
+    case ISD::SHL_PARTS: {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Node->getOperand(2).getValueType().isVector()) {
-        SDValue SAO =
-          DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
-                                    Node->getOperand(2));
-        HandleSDNode Handle(SAO);
-        LegalizeOp(SAO.getNode());
-        NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
-                                         Node->getOperand(1),
-                                         Handle.getValue());
+      SDValue Op0 = Node->getOperand(0);
+      SDValue Op1 = Node->getOperand(1);
+      SDValue Op2 = Node->getOperand(2);
+      if (!Op2.getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2);
+        // The getShiftAmountOperand() may create a new operand node or
+        // return the existing one. If new operand is created we need
+        // to update the parent node.
+        if (SAO != Op2)
+          NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
       }
-      break;
+    }
+    break;
     }
 
     if (NewNode != Node) {
@@ -1118,12 +1124,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         ReplaceNode(Node, ResultVals.data());
         return;
       }
+      LLVM_FALLTHROUGH;
     }
-      // FALL THROUGH
     case TargetLowering::Expand:
       if (ExpandNode(Node))
         return;
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case TargetLowering::LibCall:
       ConvertNodeToLibcall(Node);
       return;
@@ -1196,21 +1202,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
     }
   }
 
+  EVT VecVT = Vec.getValueType();
+
   if (!Ch.getNode()) {
     // Store the value to a temporary stack slot, then LOAD the returned part.
-    StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+    StackPtr = DAG.CreateStackTemporary(VecVT);
     Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
                       MachinePointerInfo());
   }
 
-  // Add the offset to the index.
-  unsigned EltSize =
-      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
-  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
-
-  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
-  StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+  StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
 
   SDValue NewLoad;
 
@@ -1220,7 +1221,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   else
     NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
                              MachinePointerInfo(),
-                             Vec.getValueType().getVectorElementType());
+                             VecVT.getVectorElementType());
 
   // Replace the chain going out of the store, by the one out of the load.
   DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
@@ -1244,8 +1245,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
   SDLoc dl(Op);
 
   // Store the value to a temporary stack slot, then LOAD the returned part.
-
-  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  EVT VecVT = Vec.getValueType();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
@@ -1254,17 +1255,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
 
   // Then store the inserted part.
-
-  // Add the offset to the index.
-  unsigned EltSize =
-      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
-
-  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
-  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
-
-  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
-                                    StackPtr);
+  SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
 
   // Store the subvector.
   Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
@@ -1593,6 +1584,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
           break;
         }
         // Fallthrough if we are unsigned integer.
+        LLVM_FALLTHROUGH;
     case ISD::SETLE:
     case ISD::SETGT:
     case ISD::SETGE:
@@ -1650,7 +1642,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
 
-  unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+  unsigned SrcSize = SrcOp.getValueSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
@@ -2521,13 +2513,56 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
   return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
 }
 
-/// Open code the operations for BITREVERSE.
+/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
 SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
   EVT VT = Op.getValueType();
   EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   unsigned Sz = VT.getScalarSizeInBits();
 
-  SDValue Tmp, Tmp2;
+  SDValue Tmp, Tmp2, Tmp3;
+
+  // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+  // and finally the i1 pairs.
+  // TODO: We can easily support i4/i2 legal types if any target ever does.
+  if (Sz >= 8 && isPowerOf2_32(Sz)) {
+    // Create the masks - repeating the pattern every byte.
+    APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
+    APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
+    for (unsigned J = 0; J != Sz; J += 8) {
+      MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
+      MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
+      MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
+      MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
+      MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
+      MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+    }
+
+    // BSWAP if the type is wider than a single byte.
+    Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+    // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+    // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+    // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+    return Tmp;
+  }
+
   Tmp = DAG.getConstant(0, dl, VT);
   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
     if (I < J)
@@ -2551,7 +2586,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
   EVT VT = Op.getValueType();
   EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.getSimpleVT().getScalarType().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2780,10 +2815,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Swap = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
         Node->getOperand(0), Node->getOperand(1), Zero, Zero,
-        cast<AtomicSDNode>(Node)->getMemOperand(),
-        cast<AtomicSDNode>(Node)->getOrdering(),
-        cast<AtomicSDNode>(Node)->getOrdering(),
-        cast<AtomicSDNode>(Node)->getSynchScope());
+        cast<AtomicSDNode>(Node)->getMemOperand());
     Results.push_back(Swap.getValue(0));
     Results.push_back(Swap.getValue(1));
     break;
@@ -2794,9 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                  cast<AtomicSDNode>(Node)->getMemoryVT(),
                                  Node->getOperand(0),
                                  Node->getOperand(1), Node->getOperand(2),
-                                 cast<AtomicSDNode>(Node)->getMemOperand(),
-                                 cast<AtomicSDNode>(Node)->getOrdering(),
-                                 cast<AtomicSDNode>(Node)->getSynchScope());
+                                 cast<AtomicSDNode>(Node)->getMemOperand());
     Results.push_back(Swap.getValue(1));
     break;
   }
@@ -2808,10 +2838,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Res = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
         Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
-        Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
-        cast<AtomicSDNode>(Node)->getSuccessOrdering(),
-        cast<AtomicSDNode>(Node)->getFailureOrdering(),
-        cast<AtomicSDNode>(Node)->getSynchScope());
+        Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand());
 
     SDValue ExtRes = Res;
     SDValue LHS = Res;
@@ -2879,15 +2906,32 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    // NOTE: we could fall back on load/store here too for targets without
-    // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
+
+    // An in-register sign-extend of a boolean is a negation:
+    // 'true' (1) sign-extended is -1.
+    // 'false' (0) sign-extended is 0.
+    // However, we must mask the high bits of the source operand because the
+    // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
+
+    // TODO: Do this for vectors too?
+    if (ExtraVT.getSizeInBits() == 1) {
+      SDValue One = DAG.getConstant(1, dl, VT);
+      SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+      SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
+      Results.push_back(Neg);
+      break;
+    }
+
+    // NOTE: we could fall back on load/store here too for targets without
+    // SRA.  However, it is doubtful that any exist.
     EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     if (VT.isVector())
       ShiftAmountTy = VT;
-    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
-                        ExtraVT.getScalarType().getSizeInBits();
+    unsigned BitsDiff = VT.getScalarSizeInBits() -
+                        ExtraVT.getScalarSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
@@ -3248,17 +3292,49 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::MULHU:
   case ISD::MULHS: {
-    unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
-                                                              ISD::SMUL_LOHI;
+    unsigned ExpandOpcode =
+        Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI;
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
-    assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
-           "If this wasn't legal, it shouldn't have been created!");
+
     Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
                        Node->getOperand(1));
     Results.push_back(Tmp1.getValue(1));
     break;
   }
+  case ISD::UMUL_LOHI:
+  case ISD::SMUL_LOHI: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    MVT VT = LHS.getSimpleValueType();
+    unsigned MULHOpcode =
+        Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS;
+
+    if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) {
+      Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS));
+      Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS));
+      break;
+    }
+
+    SmallVector<SDValue, 4> Halves;
+    EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
+    assert(TLI.isTypeLegal(HalfType));
+    if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+                           HalfType, DAG,
+                           TargetLowering::MulExpansionKind::Always)) {
+      for (unsigned i = 0; i < 2; ++i) {
+        SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]);
+        SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]);
+        SDValue Shift = DAG.getConstant(
+            HalfType.getScalarSizeInBits(), dl,
+            TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+        Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+        Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+      }
+      break;
+    }
+    break;
+  }
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3293,7 +3369,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
         TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
         TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
-        TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
+        TLI.expandMUL(Node, Lo, Hi, HalfType, DAG,
+                      TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) {
       Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
       Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
       SDValue Shift =
@@ -3416,8 +3493,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // pre-lowered to the correct types. This all depends upon WideVT not
       // being a legal type for the architecture and thus has to be split to
       // two arguments.
-      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
-      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      SDValue Ret;
+      if(DAG.getDataLayout().isLittleEndian()) {
+        // Halves of WideVT are packed into registers in different order
+        // depending on platform endianness. This is usually handled by
+        // the C calling convention, but we can't defer to it in
+        // the legalizer.
+        SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      } else {
+        SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      }
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
                                DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
@@ -3441,6 +3528,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
                              DAG.getConstant(0, dl, VT), ISD::SETNE);
     }
+
+    // Truncate the result if SetCC returns a larger type than needed.
+    EVT RType = Node->getValueType(1);
+    if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
+      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
+
+    assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
+           "Unexpected result type for S/UMULO legalization");
+
     Results.push_back(BottomHalf);
     Results.push_back(TopHalf);
     break;
@@ -3476,9 +3572,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Table = Node->getOperand(1);
     SDValue Index = Node->getOperand(2);
 
-    EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
-
     const DataLayout &TD = DAG.getDataLayout();
+    EVT PTy = TLI.getPointerTy(TD);
+
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
@@ -3492,7 +3588,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         ISD::SEXTLOAD, dl, PTy, Chain, Addr,
         MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
     Addr = LD;
-    if (TM.isPositionIndependent()) {
+    if (TLI.isJumpTableRelative()) {
       // For PIC, the sequence is:
       // BRIND(load(Jumptable + index) + RelocBase)
       // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -4019,10 +4115,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     }
     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
+  case ISD::BITREVERSE:
   case ISD::BSWAP: {
     unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
-    Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(
         ISD::SRL, dl, NVT, Tmp1,
         DAG.getConstant(DiffBits, dl,
@@ -4073,6 +4170,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     ReplacedNode(Node);
     break;
   }
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -4082,7 +4183,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       TruncOp = ISD::BITCAST;
     } else {
       assert(OVT.isInteger() && "Cannot promote logic operation");
-      ExtOp   = ISD::ANY_EXTEND;
+
+      switch (Node->getOpcode()) {
+      default:
+        ExtOp = ISD::ANY_EXTEND;
+        break;
+      case ISD::SDIV:
+      case ISD::SREM:
+        ExtOp = ISD::SIGN_EXTEND;
+        break;
+      case ISD::UDIV:
+      case ISD::UREM:
+        ExtOp = ISD::ZERO_EXTEND;
+        break;
+      }
       TruncOp = ISD::TRUNCATE;
     }
     // Promote each of the values to the new type.
@@ -4093,6 +4207,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
     break;
   }
+  case ISD::UMUL_LOHI:
+  case ISD::SMUL_LOHI: {
+    // Promote to a multiply in a wider integer type.
+    unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND
+                                                         : ISD::SIGN_EXTEND;
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2);
+
+    auto &DL = DAG.getDataLayout();
+    unsigned OriginalSize = OVT.getScalarSizeInBits();
+    Tmp2 = DAG.getNode(
+        ISD::SRL, dl, NVT, Tmp1,
+        DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT)));
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+    break;
+  }
   case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector() ||
@@ -4351,7 +4483,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     MVT EltVT = OVT.getVectorElementType();
     MVT NewEltVT = NVT.getVectorElementType();
 
-    // Handle bitcasts to different vector type with the smae total bit size.
+    // Handle bitcasts to different vector type with the same total bit size.
     //
     // e.g. v2i64 = scalar_to_vector x:i64
     //   =>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 31ebf7b..72b56d8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -632,7 +632,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
   SDLoc dl(N);
 
   auto MMOFlags =
-      L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      L->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   SDValue NewL;
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
@@ -1465,7 +1466,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 
   // TODO: Are there fast-math-flags to propagate to this FADD?
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
-                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
                                              APInt(128, Parts)),
                                      dl, MVT::ppcf128));
   Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
@@ -1630,7 +1631,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
     assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
            "Logic only correct for ppcf128!");
     const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
-    APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+    APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
     SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
@@ -2085,7 +2086,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
   // Load the value as an integer value with the same number of bits.
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   auto MMOFlags =
-      L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      L->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
                              SDLoc(N), L->getChain(), L->getBasePtr(),
                              L->getOffset(), L->getPointerInfo(), IVT,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9a18943..dc436ce 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -57,8 +57,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
   case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
-  case ISD::CONVERT_RNDSAT:
-                         Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
   case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
   case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
@@ -102,6 +100,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:
                          Res = PromoteIntRes_CONCAT_VECTORS(N); break;
 
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+                         Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
+
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -183,8 +186,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
                               N->getMemoryVT(), ResVT,
                               N->getChain(), N->getBasePtr(),
-                              N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+                              N->getMemOperand());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -196,8 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
                               N->getMemoryVT(),
                               N->getChain(), N->getBasePtr(),
-                              Op2, N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+                              Op2, N->getMemOperand());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -220,8 +221,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
     SDValue Res = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
         N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
-        N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
-        N->getSynchScope());
+        N->getMemOperand());
     ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
     ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
     return Res.getValue(1);
@@ -233,8 +233,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
       DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
   SDValue Res = DAG.getAtomicCmpSwap(
       N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
-      N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
-      N->getFailureOrdering(), N->getSynchScope());
+      N->getBasePtr(), Op2, Op3, N->getMemOperand());
   // Update the use to N with the newly created Res.
   for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
     ReplaceValueWith(SDValue(N, i), Res.getValue(i));
@@ -353,18 +352,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
   return Result;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
-  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
-           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
-           CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
-          "can only promote integers");
-  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0),
-                              N->getOperand(1), N->getOperand(2),
-                              N->getOperand(3), N->getOperand(4), CvtCode);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   // Zero extend to the promoted type and do the count there.
   SDValue Op = ZExtPromotedInteger(N->getOperand(0));
@@ -427,6 +414,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   // Assert that the converted value fits in the original type.  If it doesn't
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
+  //
+  // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
+  //   before legalization: fp-to-uint16, 65534. -> 0xfffe
+  //   after legalization: fp-to-sint32, 65534. -> 0x0000fffe
   return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
                      ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
                      DAG.getValueType(N->getValueType(0).getScalarType()));
@@ -507,7 +498,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
                    N->getIndex()};
   SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
                                     N->getMemoryVT(), dl, Ops,
-                                    N->getMemOperand()); 
+                                    N->getMemOperand());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -882,8 +873,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
-  case ISD::CONVERT_RNDSAT:
-                          Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
   case ISD::INSERT_VECTOR_ELT:
                           Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
   case ISD::SCALAR_TO_VECTOR:
@@ -946,14 +935,16 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
     SDValue OpL = GetPromotedInteger(NewLHS);
     SDValue OpR = GetPromotedInteger(NewRHS);
 
-    // We would prefer to promote the comparison operand with sign extension,
-    // if we find the operand is actually to truncate an AssertSext. With this
-    // optimization, we can avoid inserting real truncate instruction, which
-    // is redudant eventually.
-    if (OpL->getOpcode() == ISD::AssertSext &&
-        cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
-        OpR->getOpcode() == ISD::AssertSext &&
-        cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+    // We would prefer to promote the comparison operand with sign extension.
+    // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+    // than the width of NewLHS/NewRH, we can avoid inserting real truncate
+    // instruction, which is redudant eventually.
+    unsigned OpLEffectiveBits =
+        OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+    unsigned OpREffectiveBits =
+        OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+    if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() &&
+        OpREffectiveBits <= NewRHS.getValueSizeInBits()) {
       NewLHS = OpL;
       NewRHS = OpR;
     } else {
@@ -990,8 +981,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
   SDValue Op2 = GetPromotedInteger(N->getOperand(2));
   return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
-                       N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
-                       N->getOrdering(), N->getSynchScope());
+                       N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
@@ -1051,8 +1041,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
   // Promote the inserted value.  The type does not need to match the
   // vector element type.  Check that any extra bits introduced will be
   // truncated away.
-  assert(N->getOperand(0).getValueType().getSizeInBits() >=
-         N->getValueType(0).getVectorElementType().getSizeInBits() &&
+  assert(N->getOperand(0).getValueSizeInBits() >=
+         N->getValueType(0).getScalarSizeInBits() &&
          "Type of inserted value narrower than vector element type!");
 
   SmallVector<SDValue, 16> NewOps;
@@ -1062,18 +1052,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
   return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
-  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
-           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
-           CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
-           "can only promote integer arguments");
-  SDValue InOp = GetPromotedInteger(N->getOperand(0));
-  return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp,
-                              N->getOperand(1), N->getOperand(2),
-                              N->getOperand(3), N->getOperand(4), CvtCode);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
                                                          unsigned OpNo) {
   if (OpNo == 1) {
@@ -1081,8 +1059,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
     // have to match the vector element type.
 
     // Check that any extra bits introduced will be truncated away.
-    assert(N->getOperand(1).getValueType().getSizeInBits() >=
-           N->getValueType(0).getVectorElementType().getSizeInBits() &&
+    assert(N->getOperand(1).getValueSizeInBits() >=
+           N->getValueType(0).getScalarSizeInBits() &&
            "Type of inserted value narrower than vector element type!");
     return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
                                   GetPromotedInteger(N->getOperand(1)),
@@ -1210,7 +1188,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
 
   return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
                             N->getMemoryVT(), N->getMemOperand(),
-                            TruncateStore);
+                            TruncateStore, N->isCompressingStore());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -1233,7 +1211,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
     NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
   } else
     NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
-  return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+
+  SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+  // updated in place.
+  if (Res.getNode() == N)
+    return Res;
+
+  ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1314,6 +1300,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
   case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
+  case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
@@ -1352,8 +1339,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     SDValue Tmp = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
         N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
-        AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
-        AN->getSynchScope());
+        AN->getMemOperand());
 
     // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
     // success simply by comparing the loaded value against the ingoing
@@ -1508,8 +1494,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   EVT ShTy = Amt.getValueType();
-  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
-  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+  unsigned ShBits = ShTy.getScalarSizeInBits();
+  unsigned NVTBits = NVT.getScalarSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
   SDLoc dl(N);
@@ -1700,7 +1686,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
   EVT CCT = getSetCCResultType(NVT);
 
   // Hi part is always the same op
-  Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
+  Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
 
   // We need to know whether to select Lo part that corresponds to 'winning'
   // Hi part or if Hi parts are equal.
@@ -1711,7 +1697,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
   SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
 
   // Recursed Lo part if Hi parts are equal, this uses unsigned version
-  SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
+  SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
 
   Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
 }
@@ -1774,7 +1760,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     switch (BoolType) {
     case TargetLoweringBase::UndefinedBooleanContent:
       OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case TargetLoweringBase::ZeroOrOneBooleanContent:
       Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
       break;
@@ -2002,6 +1988,19 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   Hi = DAG.getConstant(0, dl, NVT);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  SDLoc dl(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NBitWidth = NVT.getSizeInBits();
+
+  EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+  Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+  // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+  Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                   DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+}
+
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
@@ -2065,7 +2064,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     if (ExtType == ISD::SEXTLOAD) {
       // The high part is obtained by SRA'ing all but one of the bits of the
       // lo part.
-      unsigned LoSize = Lo.getValueType().getSizeInBits();
+      unsigned LoSize = Lo.getValueSizeInBits();
       Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
                        DAG.getConstant(LoSize - 1, dl,
                                        TLI.getPointerTy(DAG.getDataLayout())));
@@ -2166,7 +2165,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   GetExpandedInteger(N->getOperand(0), LL, LH);
   GetExpandedInteger(N->getOperand(1), RL, RH);
 
-  if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH))
+  if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
+                    TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+                    LL, LH, RL, RH))
     return;
 
   // If nothing else, we can make a libcall.
@@ -2180,7 +2181,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   else if (VT == MVT::i128)
     LC = RTLIB::MUL_I128;
 
-  if (LC == RTLIB::UNKNOWN_LIBCALL) {
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
     // We'll expand the multiplication by brute force because we have no other
     // options. This is a trivially-generalized version of the code from
     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
@@ -2354,8 +2355,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     // the new SHL_PARTS operation would need further legalization.
     SDValue ShiftOp = N->getOperand(1);
     EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    assert(ShiftTy.getScalarType().getSizeInBits() >=
-           Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+    assert(ShiftTy.getScalarSizeInBits() >=
+           Log2_32_Ceil(VT.getScalarSizeInBits()) &&
            "ShiftAmountTy is too small to cover the range of this type!");
     if (ShiftOp.getValueType() != ShiftTy)
       ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -2436,8 +2437,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
            "Operand over promoted?");
     // Split the promoted operand.  This will simplify when it is expanded.
     SplitInteger(Res, Lo, Hi);
-    unsigned ExcessBits =
-      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
                      DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
                                                         ExcessBits)));
@@ -2458,13 +2458,12 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // The high part gets the sign extension from the lo-part.  This handles
     // things like sextinreg V:i64 from i8.
     Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
-                     DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
+                     DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
                                      TLI.getPointerTy(DAG.getDataLayout())));
   } else {
     // For example, extension of an i48 to an i64.  Leave the low part alone,
     // sext_inreg the high part.
-    unsigned ExcessBits =
-      EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+    unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
                      DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
                                                         ExcessBits)));
@@ -2690,8 +2689,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
            "Operand over promoted?");
     // Split the promoted operand.  This will simplify when it is expanded.
     SplitInteger(Res, Lo, Hi);
-    unsigned ExcessBits =
-      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getZeroExtendInReg(Hi, dl,
                                 EVT::getIntegerVT(*DAG.getContext(),
                                                   ExcessBits));
@@ -2707,10 +2705,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
   SDValue Swap = DAG.getAtomicCmpSwap(
       ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
       cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
-      N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
-      cast<AtomicSDNode>(N)->getOrdering(),
-      cast<AtomicSDNode>(N)->getOrdering(),
-      cast<AtomicSDNode>(N)->getSynchScope());
+      N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
 
   ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
   ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
@@ -2833,51 +2828,51 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   case ISD::SETUGE: LowCC = ISD::SETUGE; break;
   }
 
-  // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
-  // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
-  // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+  // LoCmp = lo(op1) < lo(op2)   // Always unsigned comparison
+  // HiCmp = hi(op1) < hi(op2)   // Signedness depends on operands
+  // dest  = hi(op1) == hi(op2) ? LoCmp : HiCmp;
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
   TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
                                                  nullptr);
-  SDValue Tmp1, Tmp2;
+  SDValue LoCmp, HiCmp;
   if (TLI.isTypeLegal(LHSLo.getValueType()) &&
       TLI.isTypeLegal(RHSLo.getValueType()))
-    Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
-                             LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
-  if (!Tmp1.getNode())
-    Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
-                        LHSLo, RHSLo, LowCC);
+    LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
+                              RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (!LoCmp.getNode())
+    LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+                         RHSLo, LowCC);
   if (TLI.isTypeLegal(LHSHi.getValueType()) &&
       TLI.isTypeLegal(RHSHi.getValueType()))
-    Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
-                             LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
-  if (!Tmp2.getNode())
-    Tmp2 = DAG.getNode(ISD::SETCC, dl,
-                       getSetCCResultType(LHSHi.getValueType()),
-                       LHSHi, RHSHi, DAG.getCondCode(CCCode));
-
-  ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
-  ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
-  if ((Tmp1C && Tmp1C->isNullValue()) ||
-      (Tmp2C && Tmp2C->isNullValue() &&
-       (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
-        CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
-      (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
-       (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
-        CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
-    // low part is known false, returns high part.
+    HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
+                              RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (!HiCmp.getNode())
+    HiCmp =
+        DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
+                    LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+  ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
+  ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
+
+  bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+                    CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+
+  if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
+      (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
+                      (LoCmpC && LoCmpC->isNullValue())))) {
     // For LE / GE, if high part is known false, ignore the low part.
-    // For LT / GT, if high part is known true, ignore the low part.
-    NewLHS = Tmp2;
+    // For LT / GT: if low part is known false, return the high part.
+    //              if high part is known true, ignore the low part.
+    NewLHS = HiCmp;
     NewRHS = SDValue();
     return;
   }
 
   if (LHSHi == RHSHi) {
     // Comparing the low bits is enough.
-    NewLHS = Tmp1;
+    NewLHS = LoCmp;
     NewRHS = SDValue();
     return;
   }
@@ -2922,8 +2917,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   if (!NewLHS.getNode())
     NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
                           LHSHi, RHSHi, ISD::SETEQ);
-  NewLHS = DAG.getSelect(dl, Tmp1.getValueType(),
-                         NewLHS, Tmp1, Tmp2);
+  NewLHS = DAG.getSelect(dl, LoCmp.getValueType(),
+                         NewLHS, LoCmp, HiCmp);
   NewRHS = SDValue();
 }
 
@@ -3198,9 +3193,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
                                cast<AtomicSDNode>(N)->getMemoryVT(),
                                N->getOperand(0),
                                N->getOperand(1), N->getOperand(2),
-                               cast<AtomicSDNode>(N)->getMemOperand(),
-                               cast<AtomicSDNode>(N)->getOrdering(),
-                               cast<AtomicSDNode>(N)->getSynchScope());
+                               cast<AtomicSDNode>(N)->getMemOperand());
   return Swap.getValue(1);
 }
 
@@ -3327,6 +3320,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  assert(NVT.isVector() && "This type must be promoted to a vector type");
+
+  SDLoc dl(N);
+
+  // For operands whose TypeAction is to promote, extend the promoted node
+  // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
+  // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
+  // type..
+  if (getTypeAction(N->getOperand(0).getValueType())
+      == TargetLowering::TypePromoteInteger) {
+    SDValue Promoted;
+
+    switch(N->getOpcode()) {
+      case ISD::SIGN_EXTEND_VECTOR_INREG:
+        Promoted = SExtPromotedInteger(N->getOperand(0));
+        break;
+      case ISD::ZERO_EXTEND_VECTOR_INREG:
+        Promoted = ZExtPromotedInteger(N->getOperand(0));
+        break;
+      case ISD::ANY_EXTEND_VECTOR_INREG:
+        Promoted = GetPromotedInteger(N->getOperand(0));
+        break;
+      default:
+        llvm_unreachable("Node has unexpected Opcode");
+    }
+    return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
+  }
+
+  // Directly extend to the appropriate transform-to type.
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 144bed2..cf19d75 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -117,6 +117,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         Mapped |= 64;
       if (WidenedVectors.find(Res) != WidenedVectors.end())
         Mapped |= 128;
+      if (PromotedFloats.find(Res) != PromotedFloats.end())
+        Mapped |= 256;
 
       if (Node.getNodeId() != Processed) {
         // Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -159,6 +161,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
           dbgs() << " SplitVectors";
         if (Mapped & 128)
           dbgs() << " WidenedVectors";
+        if (Mapped & 256)
+          dbgs() << " PromotedFloats";
         dbgs() << "\n";
         llvm_unreachable(nullptr);
       }
@@ -484,7 +488,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
   // updated after all operands have been analyzed.  Since this is rare,
   // the code tries to minimize overhead in the non-morphing case.
 
-  SmallVector<SDValue, 8> NewOps;
+  std::vector<SDValue> NewOps;
   unsigned NumProcessed = 0;
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     SDValue OrigOp = N->getOperand(i);
@@ -500,7 +504,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
       NewOps.push_back(Op);
     } else if (Op != OrigOp) {
       // This is the first operand to change - add all operands so far.
-      NewOps.append(N->op_begin(), N->op_begin() + i);
+      NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
       NewOps.push_back(Op);
     }
   }
@@ -794,8 +798,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   // Note that in some cases vector operation operands may be greater than
   // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
   // a constant i8 operand.
-  assert(Result.getValueType().getSizeInBits() >=
-         Op.getValueType().getVectorElementType().getSizeInBits() &&
+  assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
          "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
@@ -905,7 +908,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 
 /// Convert to an integer of the same size.
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
-  unsigned BitWidth = Op.getValueType().getSizeInBits();
+  unsigned BitWidth = Op.getValueSizeInBits();
   return DAG.getNode(ISD::BITCAST, SDLoc(Op),
                      EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
@@ -913,7 +916,7 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
 /// Convert to a vector of integers of the same size.
 SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   assert(Op.getValueType().isVector() && "Only applies to vectors!");
-  unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+  unsigned EltWidth = Op.getScalarValueSizeInBits();
   EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
   return DAG.getNode(ISD::BITCAST, SDLoc(Op),
@@ -1018,22 +1021,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
                    DAG.getIntPtrConstant(1, dl));
 }
 
-SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
-                                                  SDValue Index) {
-  SDLoc dl(Index);
-  // Make sure the index type is big enough to compute in.
-  Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
-
-  // Calculate the element offset and add it to the pointer.
-  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
-  assert(EltSize * 8 == EltVT.getSizeInBits() &&
-         "Converting bits to bytes lost precision");
-
-  Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
-                      DAG.getConstant(EltSize, dl, Index.getValueType()));
-  return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
-}
-
 /// Build an integer with low bits Lo and high bits Hi.
 SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   // Arbitrarily use dlHi for result SDLoc
@@ -1145,7 +1132,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
   SDLoc dl(Op);
   assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
-         Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+         Op.getValueSizeInBits() && "Invalid integer splitting!");
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
                    DAG.getConstant(LoVT.getSizeInBits(), dl,
@@ -1157,8 +1144,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
-                                 Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT =
+      EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 84ad8f8..ec55662 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -173,7 +173,6 @@ private:
   /// input operand is returned.
   SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
 
-  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
 
@@ -242,6 +241,7 @@ private:
   SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
   SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntRes_BITCAST(SDNode *N);
@@ -249,7 +249,6 @@ private:
   SDValue PromoteIntRes_BITREVERSE(SDNode *N);
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntRes_Constant(SDNode *N);
-  SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
   SDValue PromoteIntRes_CTLZ(SDNode *N);
   SDValue PromoteIntRes_CTPOP(SDNode *N);
   SDValue PromoteIntRes_CTTZ(SDNode *N);
@@ -288,7 +287,6 @@ private:
   SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
-  SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
   SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -339,6 +337,7 @@ private:
   void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ZERO_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FLT_ROUNDS        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
 
@@ -601,7 +600,6 @@ private:
 
   SDValue ScalarizeVecRes_BITCAST(SDNode *N);
   SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
-  SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -707,7 +705,6 @@ private:
   SDValue WidenVecRes_BITCAST(SDNode* N);
   SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
   SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
-  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
   SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 665180e..3682c32 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -141,11 +141,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
         if (DAG.getDataLayout().isBigEndian())
           std::swap(LHS, RHS);
 
-        Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
-                                   EVT::getIntegerVT(
-                                     *DAG.getContext(),
-                                     LHS.getValueType().getSizeInBits() << 1),
-                                   LHS, RHS));
+        Vals.push_back(DAG.getNode(
+            ISD::BUILD_PAIR, dl,
+            EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1),
+            LHS, RHS));
       }
       Lo = Vals[Slot++];
       Hi = Vals[Slot++];
@@ -337,7 +336,8 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
 
 SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
   SDLoc dl(N);
-  if (N->getValueType(0).isVector()) {
+  if (N->getValueType(0).isVector() &&
+      N->getOperand(0).getValueType().isInteger()) {
     // An illegal expanding type is being converted to a legal vector type.
     // Make a two element vector out of the expanded parts and convert that
     // instead, but only if the new vector type is legal (otherwise there
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3c9cb17..d4fa20f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -106,7 +106,8 @@ class VectorLegalizer {
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
   SDValue ExpandBITREVERSE(SDValue Op);
-  SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
+  SDValue ExpandCTLZ(SDValue Op);
+  SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
 
   /// \brief Implements vector promotion.
   ///
@@ -332,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI:
     QueryType = Node->getValueType(0);
     break;
   case ISD::FP_ROUND_INREG:
@@ -362,7 +365,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       Result = Tmp1;
       break;
     }
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   }
   case TargetLowering::Expand:
     Result = Expand(Op);
@@ -693,9 +696,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
     return UnrollVSETCC(Op);
   case ISD::BITREVERSE:
     return ExpandBITREVERSE(Op);
+  case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
+    return ExpandCTLZ(Op);
   case ISD::CTTZ_ZERO_UNDEF:
-    return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
+    return ExpandCTTZ_ZERO_UNDEF(Op);
   default:
     return DAG.UnrollVectorOp(Op.getNode());
   }
@@ -770,8 +775,8 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
   SDLoc DL(Op);
   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
 
-  unsigned BW = VT.getScalarType().getSizeInBits();
-  unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+  unsigned BW = VT.getScalarSizeInBits();
+  unsigned OrigBW = OrigTy.getScalarSizeInBits();
   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 
   Op = Op.getOperand(0);
@@ -817,8 +822,8 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
   // Now we need sign extend. Do this by shifting the elements. Even if these
   // aren't legal operations, they have a better chance of being legalized
   // without full scalarization than the sign extension does.
-  unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
-  unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+  unsigned EltWidth = VT.getScalarSizeInBits();
+  unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
   return DAG.getNode(ISD::SRA, DL, VT,
                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
@@ -951,7 +956,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // If the mask and the type are different sizes, unroll the vector op. This
   // can occur when getSetCCResultType returns something that is different in
   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
-  if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
+  if (VT.getSizeInBits() != Op1.getValueSizeInBits())
     return DAG.UnrollVectorOp(Op.getNode());
 
   // Bitcast the operands to be the same type as the mask.
@@ -961,7 +966,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
+    APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
@@ -979,21 +984,20 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
     return DAG.UnrollVectorOp(Op.getNode());
 
- EVT SVT = VT.getScalarType();
-  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
-      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+  unsigned BW = VT.getScalarSizeInBits();
+  assert((BW == 64 || BW == 32) &&
+         "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
 
-  unsigned BW = SVT.getSizeInBits();
-  SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
+  SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
 
   // Constants to clear the upper part of the word.
   // Notice that we can also use SHL+SHR, but using a constant is slightly
   // faster on x86.
-  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+  uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
+  SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
 
   // Clear upper part of LO, lower HI
   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -1010,7 +1014,6 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
 }
 
-
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
     SDLoc DL(Op);
@@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
-SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
+  EVT VT = Op.getValueType();
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+  // If the non-ZERO_UNDEF version is supported we can use that instead.
+  if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+      TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
+  }
+
+  // If CTPOP is available we can lower with a CTPOP based method:
+  // u16 ctlz(u16 x) {
+  //   x |= (x >> 1);
+  //   x |= (x >> 2);
+  //   x |= (x >> 4);
+  //   x |= (x >> 8);
+  //   return ctpop(~x);
+  // }
+  // Ref: "Hacker's Delight" by Henry Warren
+  if (isPowerOf2_32(NumBitsPerElt) &&
+      TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+      TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
+    SDLoc DL(Op);
+    SDValue Res = Op.getOperand(0);
+    EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+    for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
+      Res = DAG.getNode(
+          ISD::OR, DL, VT, Res,
+          DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+
+    Res = DAG.getNOT(DL, Res, VT);
+    return DAG.getNode(ISD::CTPOP, DL, VT, Res);
+  }
+
+  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
   // If the non-ZERO_UNDEF version is supported we can use that instead.
-  unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
-  if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+  if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
     SDLoc DL(Op);
-    return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+    return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
   }
 
   // Otherwise go ahead and unroll.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f3adca4..6906f67 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -51,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MERGE_VALUES:      R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
   case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      R = ScalarizeVecRes_BUILD_VECTOR(N); break;
-  case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::FP_ROUND:          R = ScalarizeVecRes_FP_ROUND(N); break;
   case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
@@ -179,17 +178,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
   return InOp;
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
-  EVT NewVT = N->getValueType(0).getVectorElementType();
-  SDValue Op0 = GetScalarizedVector(N->getOperand(0));
-  return DAG.getConvertRndSat(NewVT, SDLoc(N),
-                              Op0, DAG.getValueType(NewVT),
-                              DAG.getValueType(Op0.getValueType()),
-                              N->getOperand(3),
-                              N->getOperand(4),
-                              cast<CvtRndSatSDNode>(N)->getCvtCode());
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
                      N->getValueType(0).getVectorElementType(),
@@ -621,7 +609,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
 
   case ISD::BITREVERSE:
   case ISD::BSWAP:
-  case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
   case ISD::CTTZ:
   case ISD::CTLZ_ZERO_UNDEF:
@@ -846,7 +833,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
   GetSplitVector(Vec, Lo, Hi);
 
   EVT VecVT = Vec.getValueType();
-  EVT VecElemVT = VecVT.getVectorElementType();
   unsigned VecElems = VecVT.getVectorNumElements();
   unsigned SubElems = SubVec.getValueType().getVectorNumElements();
 
@@ -872,7 +858,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
       DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
 
   // Store the new subvector into the specified index.
-  SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
+  SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
   Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
@@ -882,7 +868,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
       DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
 
   // Increment the pointer to the other part.
-  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
   StackPtr =
       DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                   DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
@@ -1003,7 +989,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
-  SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
   Store =
@@ -1014,7 +1000,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
       DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
 
   // Increment the pointer to the other part.
-  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                          DAG.getConstant(IncrementSize, dl,
                                          StackPtr.getValueType()));
@@ -1114,11 +1100,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
   Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
-                         ExtType);
+                         ExtType, MLD->isExpandingLoad());
 
-  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+                                   MLD->isExpandingLoad());
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(),
@@ -1126,7 +1111,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
   Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
-                         ExtType);
+                         ExtType, MLD->isExpandingLoad());
 
 
   // Build a factor node to remember that this load is independent of the
@@ -1237,18 +1222,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   if (N->getOpcode() == ISD::FP_ROUND) {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
-  } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
-    SDValue DTyOpLo = DAG.getValueType(LoVT);
-    SDValue DTyOpHi = DAG.getValueType(HiVT);
-    SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
-    SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
-    SDValue RndOp = N->getOperand(3);
-    SDValue SatOp = N->getOperand(4);
-    ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-    Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
-                              CvtCode);
-    Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
-                              CvtCode);
   } else {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1282,7 +1255,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
     LLVMContext &Ctx = *DAG.getContext();
     EVT NewSrcVT = EVT::getVectorVT(
         Ctx, EVT::getIntegerVT(
-                 Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+                 Ctx, SrcVT.getScalarSizeInBits() * 2),
         NumElements);
     EVT SplitSrcVT =
         EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
@@ -1651,7 +1624,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
       DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
 
   // Load back the required element.
-  StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
                         MachinePointerInfo(), EltVT);
 }
@@ -1769,19 +1742,18 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                          Alignment, N->getAAInfo(), N->getRanges());
 
   Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                          N->isTruncatingStore());
-
-  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+                          N->isTruncatingStore(),
+                          N->isCompressingStore());
 
+  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                   N->isCompressingStore());
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(),
                          MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                          SecondHalfAlignment, N->getAAInfo(), N->getRanges());
 
   Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                          N->isTruncatingStore());
+                          N->isTruncatingStore(), N->isCompressingStore());
 
   // Build a factor node to remember that this store is independent of the
   // other one.
@@ -1940,8 +1912,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // if we're trying to split it at all. assert() that's true, just in case.
   assert(!(NumElements & 1) && "Splitting vector, but not in half!");
 
-  unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
-  unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+  unsigned InElementSize = InVT.getScalarSizeInBits();
+  unsigned OutElementSize = OutVT.getScalarSizeInBits();
 
   // If the input elements are only 1/2 the width of the result elements,
   // just use the normal splitting. Our trick only work if there's room
@@ -2047,7 +2019,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
-  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
@@ -2695,86 +2666,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
-  SDLoc dl(N);
-  SDValue InOp  = N->getOperand(0);
-  SDValue RndOp = N->getOperand(3);
-  SDValue SatOp = N->getOperand(4);
-
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
-  EVT InVT = InOp.getValueType();
-  EVT InEltVT = InVT.getVectorElementType();
-  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
-
-  SDValue DTyOp = DAG.getValueType(WidenVT);
-  SDValue STyOp = DAG.getValueType(InWidenVT);
-  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
-  unsigned InVTNumElts = InVT.getVectorNumElements();
-  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
-    InOp = GetWidenedVector(InOp);
-    InVT = InOp.getValueType();
-    InVTNumElts = InVT.getVectorNumElements();
-    if (InVTNumElts == WidenNumElts)
-      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
-                                  SatOp, CvtCode);
-  }
-
-  if (TLI.isTypeLegal(InWidenVT)) {
-    // Because the result and the input are different vector types, widening
-    // the result could create a legal type but widening the input might make
-    // it an illegal type that might lead to repeatedly splitting the input
-    // and then widening it. To avoid this, we widen the input only if
-    // it results in a legal type.
-    if (WidenNumElts % InVTNumElts == 0) {
-      // Widen the input and call convert on the widened input vector.
-      unsigned NumConcat = WidenNumElts/InVTNumElts;
-      SmallVector<SDValue, 16> Ops(NumConcat);
-      Ops[0] = InOp;
-      SDValue UndefVal = DAG.getUNDEF(InVT);
-      for (unsigned i = 1; i != NumConcat; ++i)
-        Ops[i] = UndefVal;
-
-      InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
-      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
-                                  SatOp, CvtCode);
-    }
-
-    if (InVTNumElts % WidenNumElts == 0) {
-      // Extract the input and convert the shorten input vector.
-      InOp = DAG.getNode(
-          ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
-          DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
-                                  SatOp, CvtCode);
-    }
-  }
-
-  // Otherwise unroll into some nasty scalar code and rebuild the vector.
-  SmallVector<SDValue, 16> Ops(WidenNumElts);
-  EVT EltVT = WidenVT.getVectorElementType();
-  DTyOp = DAG.getValueType(EltVT);
-  STyOp = DAG.getValueType(InEltVT);
-
-  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
-  unsigned i;
-  for (i=0; i < MinElts; ++i) {
-    SDValue ExtVal = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-    Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
-                                  SatOp, CvtCode);
-  }
-
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
-  for (; i < WidenNumElts; ++i)
-    Ops[i] = UndefVal;
-
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
 SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   EVT      VT = N->getValueType(0);
   EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -2881,7 +2772,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
 
   SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
                                   Mask, Src0, N->getMemoryVT(),
-                                  N->getMemOperand(), ExtType);
+                                  N->getMemOperand(), ExtType,
+	                                N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3317,7 +3209,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
          "Mask and data vectors should have the same number of elements");
   return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
                             Mask, MST->getMemoryVT(), MST->getMemOperand(),
-                            false);
+                            false, MST->isCompressingStore());
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
@@ -3547,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
                       LD->getPointerInfo().getWithOffset(Offset),
                       MinAlign(Align, Increment), MMOFlags, AAInfo);
       LdChain.push_back(L.getValue(1));
-      if (L->getValueType(0).isVector()) {
+      if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+        // Later code assumes the vector loads produced will be mergeable, so we
+        // must pad the final entry up to the previous width. Scalars are
+        // combined separately.
         SmallVector<SDValue, 16> Loads;
         Loads.push_back(L);
         unsigned size = L->getValueSizeInBits(0);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1e5c4a7..ded8e68 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -631,7 +631,7 @@ SUnit *ResourcePriorityQueue::pop() {
 
 void ResourcePriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
-  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  std::vector<SUnit *>::iterator I = find(Queue, SU);
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 802c459..3549ccd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1339,7 +1339,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
     if (Reg) {
       SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
-      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+      if (!is_contained(LRegs, Reg))
         continue;
     }
     SU->isPending = false;
@@ -1704,8 +1704,7 @@ public:
   void remove(SUnit *SU) override {
     assert(!Queue.empty() && "Queue is empty!");
     assert(SU->NodeQueueId != 0 && "Not in queue!");
-    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
-                                                 SU);
+    std::vector<SUnit *>::iterator I = find(Queue, SU);
     if (I != std::prev(Queue.end()))
       std::swap(*I, Queue.back());
     Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5cc8066..a058942 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -15,10 +15,20 @@
 #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
 #define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
 
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <string>
+#include <vector>
 
 namespace llvm {
+
+class InstrItineraryData;
+
   /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
   ///
   /// Edges between SUnits are initially based on edges in the SelectionDAG,
@@ -44,7 +54,7 @@ namespace llvm {
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
-    ~ScheduleDAGSDNodes() override {}
+    ~ScheduleDAGSDNodes() override = default;
 
     /// Run - perform scheduling.
     ///
@@ -131,6 +141,7 @@ namespace llvm {
       unsigned DefIdx;
       unsigned NodeNumDefs;
       MVT ValueType;
+
     public:
       RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
 
@@ -150,6 +161,7 @@ namespace llvm {
       }
 
       void Advance();
+
     private:
       void InitNodeNumDefs();
     };
@@ -175,6 +187,7 @@ namespace llvm {
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
                          MachineBasicBlock::iterator InsertPos);
   };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 29d11c7..e225ba8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -134,7 +134,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // we care if the resultant vector is all ones, not whether the individual
   // constants are.
   SDValue NotZero = N->getOperand(i);
-  unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+  unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
     if (CN->getAPIntValue().countTrailingOnes() < EltSize)
       return false;
@@ -173,7 +173,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
     // We only want to check enough bits to cover the vector elements, because
     // we care if the resultant vector is all zeros, not whether the individual
     // constants are.
-    unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+    unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
       if (CN->getAPIntValue().countTrailingZeros() < EltSize)
         return false;
@@ -403,7 +403,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddPointer(GA->getGlobal());
     ID.AddInteger(GA->getOffset());
     ID.AddInteger(GA->getTargetFlags());
-    ID.AddInteger(GA->getAddressSpace());
     break;
   }
   case ISD::BasicBlock:
@@ -521,24 +520,6 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
   AddNodeIDCustom(ID, N);
 }
 
-/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, temporalness, indexing mode, and
-/// extension/truncation information.
-///
-static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
-                     bool isNonTemporal, bool isInvariant) {
-  assert((ConvType & 3) == ConvType &&
-         "ConvType may not require more than 2 bits!");
-  assert((AM & 7) == AM &&
-         "AM may not require more than 3 bits!");
-  return ConvType |
-         (AM << 2) |
-         (isVolatile << 5) |
-         (isNonTemporal << 6) |
-         (isInvariant << 7);
-}
-
 //===----------------------------------------------------------------------===//
 //                              SelectionDAG Class
 //===----------------------------------------------------------------------===//
@@ -1030,7 +1011,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
          "getZeroExtendInReg should use the vector element type instead of "
          "the vector type!");
   if (Op.getValueType() == VT) return Op;
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
   APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
   return getNode(ISD::AND, DL, Op.getValueType(), Op,
@@ -1040,7 +1021,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
 SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                               EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1051,7 +1032,7 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
 SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                                EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1062,7 +1043,7 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
 SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                                EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1123,7 +1104,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
   if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
       TargetLowering::TypePromoteInteger) {
    EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
-   APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+   APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
    Elt = ConstantInt::get(*getContext(), NewVal);
   }
   // In other cases the element type is illegal and needs to be expanded, for
@@ -1149,7 +1130,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
     SmallVector<SDValue, 2> EltParts;
     for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
       EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
-                                           .trunc(ViaEltSizeInBits), DL,
+                                           .zextOrTrunc(ViaEltSizeInBits), DL,
                                      ViaEltVT, isT, isO));
     }
 
@@ -1166,12 +1147,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
     // This situation occurs in MIPS MSA.
 
     SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
       Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
-
-    SDValue Result = getNode(ISD::BITCAST, DL, VT,
-                             getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops));
-    return Result;
+    return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
   }
 
   assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1280,7 +1258,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
   ID.AddPointer(GV);
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
-  ID.AddInteger(GV->getType()->getAddressSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
     return SDValue(E, 0);
@@ -1333,7 +1310,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+    Alignment = MF->getFunction()->optForSize()
+                    ? getDataLayout().getABITypeAlignment(C->getType())
+                    : getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1650,31 +1629,6 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
   return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
 }
 
-SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val,
-                                       SDValue DTy, SDValue STy, SDValue Rnd,
-                                       SDValue Sat, ISD::CvtCode Code) {
-  // If the src and dest types are the same and the conversion is between
-  // integer types of the same sign or two floats, no conversion is necessary.
-  if (DTy == STy &&
-      (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
-    return Val;
-
-  FoldingSetNodeID ID;
-  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
-  AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
-  void* IP = nullptr;
-  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
-    return SDValue(E, 0);
-
-  auto *N =
-      newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code);
-  createOperands(N, Ops);
-
-  CSEMap.InsertNode(N, IP);
-  InsertNode(N);
-  return SDValue(N, 0);
-}
-
 SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
@@ -1863,13 +1817,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
 }
 
 SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
-  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
       std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
 
-  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
   return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
@@ -1881,8 +1835,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
   unsigned Align =
       std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
 
-  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+  MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+  int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
   return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
@@ -1943,29 +1897,29 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
       default: break;
       case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
       case ISD::SETNE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
                                            R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETLT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETGT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
       case ISD::SETLE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
                                            R==APFloat::cmpEqual, dl, VT);
       case ISD::SETGE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
                                            R==APFloat::cmpEqual, dl, VT);
       case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT);
@@ -1998,11 +1952,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
-  // This predicate is not safe for vector operations.
-  if (Op.getValueType().isVector())
-    return false;
-
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
   return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
 }
 
@@ -2016,28 +1966,244 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
   return (KnownZero & Mask) == Mask;
 }
 
+/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
+/// is less than the element bit-width of the shift node, return it.
+static const APInt *getValidShiftAmountConstant(SDValue V) {
+  if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+    // Shifting more than the bitwidth is not valid.
+    const APInt &ShAmt = SA->getAPIntValue();
+    if (ShAmt.ult(V.getScalarValueSizeInBits()))
+      return &ShAmt;
+  }
+  return nullptr;
+}
+
 /// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets.
+/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
+/// those that are shared by every vector element.
 void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
                                     APInt &KnownOne, unsigned Depth) const {
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  EVT VT = Op.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
+/// us to only collect the known bits that are shared by the requested vector
+/// elements.
+/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
+/// should be added when they become necessary.
+void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
+                                    APInt &KnownOne, const APInt &DemandedElts,
+                                    unsigned Depth) const {
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
   if (Depth == 6)
     return;  // Limit search depth.
 
   APInt KnownZero2, KnownOne2;
+  unsigned NumElts = DemandedElts.getBitWidth();
 
-  switch (Op.getOpcode()) {
+  if (!DemandedElts)
+    return;  // No demanded elts, better to assume we don't know anything.
+
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
   case ISD::Constant:
     // We know all of the bits for a constant!
     KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
     KnownZero = ~KnownOne;
     break;
+  case ISD::BUILD_VECTOR:
+    // Collect the known bits that are shared by every demanded vector element.
+    assert(NumElts == Op.getValueType().getVectorNumElements() &&
+           "Unexpected vector size");
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      if (!DemandedElts[i])
+        continue;
+
+      SDValue SrcOp = Op.getOperand(i);
+      computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
+
+      // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+      if (SrcOp.getValueSizeInBits() != BitWidth) {
+        assert(SrcOp.getValueSizeInBits() > BitWidth &&
+               "Expected BUILD_VECTOR implicit truncation");
+        KnownOne2 = KnownOne2.trunc(BitWidth);
+        KnownZero2 = KnownZero2.trunc(BitWidth);
+      }
+
+      // Known bits are the values that are shared by every demanded element.
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+
+      // If we don't know any bits, early out.
+      if (!KnownOne && !KnownZero)
+        break;
+    }
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    // Collect the known bits that are shared by every vector element referenced
+    // by the shuffle.
+    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+    assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (!DemandedElts[i])
+        continue;
+
+      int M = SVN->getMaskElt(i);
+      if (M < 0) {
+        // For UNDEF elements, we don't know anything about the common state of
+        // the shuffle result.
+        KnownOne.clearAllBits();
+        KnownZero.clearAllBits();
+        DemandedLHS.clearAllBits();
+        DemandedRHS.clearAllBits();
+        break;
+      }
+
+      if ((unsigned)M < NumElts)
+        DemandedLHS.setBit((unsigned)M % NumElts);
+      else
+        DemandedRHS.setBit((unsigned)M % NumElts);
+    }
+    // Known bits are the values that are shared by every demanded element.
+    if (!!DemandedLHS) {
+      SDValue LHS = Op.getOperand(0);
+      computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
+    if (!!DemandedRHS) {
+      SDValue RHS = Op.getOperand(1);
+      computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    break;
+  }
+  case ISD::CONCAT_VECTORS: {
+    // Split DemandedElts and test each of the demanded subvectors.
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    EVT SubVectorVT = Op.getOperand(0).getValueType();
+    unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+    unsigned NumSubVectors = Op.getNumOperands();
+    for (unsigned i = 0; i != NumSubVectors; ++i) {
+      APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+      DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+      if (!!DemandedSub) {
+        SDValue Sub = Op.getOperand(i);
+        computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1);
+        KnownOne &= KnownOne2;
+        KnownZero &= KnownZero2;
+      }
+      // If we don't know any bits, early out.
+      if (!KnownOne && !KnownZero)
+        break;
+    }
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    // If we know the element index, just demand that subvector elements,
+    // otherwise demand them all.
+    SDValue Src = Op.getOperand(0);
+    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+      // Offset the demanded elts by the subvector index.
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+      computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
+    } else {
+      computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
+    }
+    break;
+  }
+  case ISD::BITCAST: {
+    SDValue N0 = Op.getOperand(0);
+    unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+
+    // Ignore bitcasts from floating point.
+    if (!N0.getValueType().isInteger())
+      break;
+
+    // Fast handling of 'identity' bitcasts.
+    if (BitWidth == SubBitWidth) {
+      computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+      break;
+    }
+
+    // Support big-endian targets when it becomes useful.
+    bool IsLE = getDataLayout().isLittleEndian();
+    if (!IsLE)
+      break;
+
+    // Bitcast 'small element' vector to 'large element' scalar/vector.
+    if ((BitWidth % SubBitWidth) == 0) {
+      assert(N0.getValueType().isVector() && "Expected bitcast from vector");
+
+      // Collect known bits for the (larger) output by collecting the known
+      // bits from each set of sub elements and shift these into place.
+      // We need to separately call computeKnownBits for each set of
+      // sub elements as the knownbits for each is likely to be different.
+      unsigned SubScale = BitWidth / SubBitWidth;
+      APInt SubDemandedElts(NumElts * SubScale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i * SubScale);
+
+      for (unsigned i = 0; i != SubScale; ++i) {
+        computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i),
+                         Depth + 1);
+        KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i);
+        KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i);
+      }
+    }
+
+    // Bitcast 'large element' scalar/vector to 'small element' vector.
+    if ((SubBitWidth % BitWidth) == 0) {
+      assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+
+      // Collect known bits for the (smaller) output by collecting the known
+      // bits from the overlapping larger input elements and extracting the
+      // sub sections we actually care about.
+      unsigned SubScale = SubBitWidth / BitWidth;
+      APInt SubDemandedElts(NumElts / SubScale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i / SubScale);
+
+      computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1);
+
+      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i]) {
+          unsigned Offset = (i % SubScale) * BitWidth;
+          KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth);
+          KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth);
+          // If we don't know any bits, early out.
+          if (!KnownOne && !KnownZero)
+            break;
+        }
+    }
+    break;
+  }
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
@@ -2045,8 +2211,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownZero |= KnownZero2;
     break;
   case ISD::OR:
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
@@ -2054,8 +2222,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownOne |= KnownOne2;
     break;
   case ISD::XOR: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -2065,11 +2235,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   }
   case ISD::MUL: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // If low bits are zero in either operand, output low known-0 bits.
-    // Also compute a conserative estimate for high known-0 bits.
+    // Also compute a conservative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
     KnownOne.clearAllBits();
@@ -2089,12 +2261,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
-    KnownOne2.clearAllBits();
-    KnownZero2.clearAllBits();
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
@@ -2105,6 +2277,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::SELECT:
     computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
     computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
@@ -2113,6 +2288,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   case ISD::SELECT_CC:
     computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
     computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
@@ -2144,58 +2322,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
   case ISD::SHL:
-    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero <<= ShAmt;
-      KnownOne  <<= ShAmt;
-      // low bits known zero.
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero << *ShAmt;
+      KnownOne = KnownOne << *ShAmt;
+      // Low bits are known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
     }
     break;
   case ISD::SRL:
-    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero = KnownZero.lshr(ShAmt);
-      KnownOne  = KnownOne.lshr(ShAmt);
-
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-      KnownZero |= HighBits;  // High bits known zero.
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero.lshr(*ShAmt);
+      KnownOne  = KnownOne.lshr(*ShAmt);
+      // High bits are known zero.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
+      KnownZero |= HighBits;
     }
     break;
   case ISD::SRA:
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      // If any of the demanded bits are produced by the sign extension, we also
-      // demand the input sign bit.
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero = KnownZero.lshr(ShAmt);
-      KnownOne  = KnownOne.lshr(ShAmt);
-
-      // Handle the sign bits.
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero.lshr(*ShAmt);
+      KnownOne  = KnownOne.lshr(*ShAmt);
+      // If we know the value of the sign bit, then we know it is copied across
+      // the high bits by the shift amount.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
       APInt SignBit = APInt::getSignBit(BitWidth);
-      SignBit = SignBit.lshr(ShAmt);  // Adjust to where it is now in the mask.
-
+      SignBit = SignBit.lshr(*ShAmt);  // Adjust to where it is now in the mask.
       if (KnownZero.intersects(SignBit)) {
         KnownZero |= HighBits;  // New bits are known zero.
       } else if (KnownOne.intersects(SignBit)) {
@@ -2205,7 +2362,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    unsigned EBits = EVT.getScalarType().getSizeInBits();
+    unsigned EBits = EVT.getScalarSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
@@ -2220,7 +2377,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownOne &= InputDemandedBits;
     KnownZero &= InputDemandedBits;
 
@@ -2253,7 +2411,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // If this is a ZEXTLoad and we are looking at the loaded value.
     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
       EVT VT = LD->getMemoryVT();
-      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      unsigned MemBits = VT.getScalarSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
       if (LD->getExtensionType() == ISD::NON_EXTLOAD)
@@ -2263,11 +2421,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
@@ -2275,30 +2434,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+    unsigned InBits = InVT.getScalarSizeInBits();
 
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
 
-    // Note if the sign bit is known to be zero or one.
-    bool SignBitKnownZero = KnownZero.isNegative();
-    bool SignBitKnownOne  = KnownOne.isNegative();
-
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
-
-    // If the sign bit is known zero or one, the top bits match.
-    if (SignBitKnownZero)
-      KnownZero |= NewBits;
-    else if (SignBitKnownOne)
-      KnownOne  |= NewBits;
+    // If the sign bit is known to be zero or one, then sext will extend
+    // it to the top bits, else it will just zext.
+    KnownZero = KnownZero.sext(BitWidth);
+    KnownOne = KnownOne.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
     computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
@@ -2308,10 +2459,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     KnownZero = KnownZero.zext(InBits);
     KnownOne = KnownOne.zext(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
     break;
@@ -2330,7 +2482,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
 
   case ISD::SUB: {
-    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
       // We know that the top bits of C-X are clear if X contains less bits
       // than C (i.e. no wrap-around can happen).  For example, 20-X is
       // positive if we can prove that X is >= 0 and < 16.
@@ -2338,7 +2490,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+        computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -2350,8 +2503,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         }
       }
     }
+    LLVM_FALLTHROUGH;
   }
-  // fall through
   case ISD::ADD:
   case ISD::ADDE: {
     // Output known-0 bits are known if clear or set in both the low clear bits
@@ -2361,17 +2514,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // known to be clear. For example, if one input has the top 10 bits clear
     // and the other has the top 8 bits clear, we know the top 7 bits of the
     // output must be clear.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
     unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
 
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     KnownZeroHigh = std::min(KnownZeroHigh,
                              KnownZero2.countLeadingOnes());
     KnownZeroLow = std::min(KnownZeroLow,
                             KnownZero2.countTrailingOnes());
 
-    if (Op.getOpcode() == ISD::ADD) {
+    if (Opcode == ISD::ADD) {
       KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
       if (KnownZeroHigh > 1)
         KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
@@ -2387,11 +2542,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   }
   case ISD::SREM:
-    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
       const APInt &RA = Rem->getAPIntValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -2411,11 +2567,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     }
     break;
   case ISD::UREM: {
-    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // The upper bits are all zero, the lower ones are unchanged.
         KnownZero = KnownZero2 | ~LowBits;
@@ -2426,8 +2583,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
@@ -2437,9 +2596,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::EXTRACT_ELEMENT: {
     computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    const unsigned Index =
-      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    const unsigned BitWidth = Op.getValueType().getSizeInBits();
+    const unsigned Index = Op.getConstantOperandVal(1);
+    const unsigned BitWidth = Op.getValueSizeInBits();
 
     // Remove low part of known bits mask
     KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
@@ -2450,8 +2608,74 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownOne = KnownOne.trunc(BitWidth);
     break;
   }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue InVec = Op.getOperand(0);
+    SDValue EltNo = Op.getOperand(1);
+    EVT VecVT = InVec.getValueType();
+    const unsigned BitWidth = Op.getValueSizeInBits();
+    const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
+    const unsigned NumSrcElts = VecVT.getVectorNumElements();
+    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+    // anything about the extended bits.
+    if (BitWidth > EltBitWidth) {
+      KnownZero = KnownZero.trunc(EltBitWidth);
+      KnownOne = KnownOne.trunc(EltBitWidth);
+    }
+    ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
+      // If we know the element index, just demand that vector element.
+      unsigned Idx = ConstEltNo->getZExtValue();
+      APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+      computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
+    } else {
+      // Unknown element index, so ignore DemandedElts and demand them all.
+      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+    }
+    if (BitWidth > EltBitWidth) {
+      KnownZero = KnownZero.zext(BitWidth);
+      KnownOne = KnownOne.zext(BitWidth);
+    }
+    break;
+  }
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue InVec = Op.getOperand(0);
+    SDValue InVal = Op.getOperand(1);
+    SDValue EltNo = Op.getOperand(2);
+
+    ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+      // If we know the element index, split the demand between the
+      // source vector and the inserted element.
+      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      unsigned EltIdx = CEltNo->getZExtValue();
+
+      // If we demand the inserted element then add its common known bits.
+      if (DemandedElts[EltIdx]) {
+        computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+        KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+        KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+      }
+
+      // If we demand the source vector then add its common known bits, ensuring
+      // that we don't demand the inserted element.
+      APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
+      if (!!VectorElts) {
+        computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1);
+        KnownOne &= KnownOne2;
+        KnownZero &= KnownZero2;
+      }
+    } else {
+      // Unknown element index, so ignore DemandedElts and demand them all.
+      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+      computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+      KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+      KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+    }
+    break;
+  }
   case ISD::BSWAP: {
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero2.byteSwap();
     KnownOne = KnownOne2.byteSwap();
     break;
@@ -2460,13 +2684,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX: {
-    APInt Op0Zero, Op0One;
-    APInt Op1Zero, Op1One;
-    computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
-    computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
-
-    KnownZero = Op0Zero & Op1Zero;
-    KnownOne = Op0One & Op1One;
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
+    KnownZero &= KnownZero2;
+    KnownOne &= KnownOne2;
     break;
   }
   case ISD::FrameIndex:
@@ -2479,9 +2705,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
 
   default:
-    if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+    if (Opcode < ISD::BUILTIN_OP_END)
       break;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
@@ -2494,6 +2720,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
 }
 
 bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+  EVT OpVT = Val.getValueType();
+  unsigned BitWidth = OpVT.getScalarSizeInBits();
+
+  // Is the constant a known power of 2?
+  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
+    return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+
   // A left-shift of a constant one will have exactly one bit set because
   // shifting the bit off the end is undefined.
   if (Val.getOpcode() == ISD::SHL) {
@@ -2510,12 +2743,19 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
       return true;
   }
 
+  // Are all operands of a build vector constant powers of two?
+  if (Val.getOpcode() == ISD::BUILD_VECTOR)
+    if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
+            return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+          return false;
+        }))
+      return true;
+
   // More could be done here, though the above checks are enough
   // to handle some common cases.
 
   // Fall back to computeKnownBits to catch other known cases.
-  EVT OpVT = Val.getValueType();
-  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
   APInt KnownZero, KnownOne;
   computeKnownBits(Val, KnownZero, KnownOne);
   return (KnownZero.countPopulation() == BitWidth - 1) &&
@@ -2525,7 +2765,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
-  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned VTBits = VT.getScalarSizeInBits();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
@@ -2547,14 +2787,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   }
 
   case ISD::SIGN_EXTEND:
-    Tmp =
-        VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
     return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
 
   case ISD::SIGN_EXTEND_INREG:
     // Max of the input and what this extends.
-    Tmp =
-      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
     Tmp = VTBits-Tmp+1;
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2563,17 +2801,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   case ISD::SRA:
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     // SRA X, C   -> adds C sign bits.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      Tmp += C->getZExtValue();
-      if (Tmp > VTBits) Tmp = VTBits;
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+      APInt ShiftVal = C->getAPIntValue();
+      ShiftVal += Tmp;
+      Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
     }
     return Tmp;
   case ISD::SHL:
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
       // shl destroys sign bits.
       Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
-      if (C->getZExtValue() >= VTBits ||      // Bad shift.
-          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      if (C->getAPIntValue().uge(VTBits) ||      // Bad shift.
+          C->getAPIntValue().uge(Tmp)) break;    // Shifted all sign bits out.
       return Tmp - C->getZExtValue();
     }
     break;
@@ -2679,7 +2918,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
     if (Tmp2 == 1) return 1;
 
     // Handle NEG.
-    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero, KnownOne;
         computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
@@ -2701,25 +2940,50 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp == 1) return 1;  // Early out.
     return std::min(Tmp, Tmp2)-1;
-  case ISD::TRUNCATE:
-    // FIXME: it's tricky to do anything useful for this, but it is an important
-    // case for targets like X86.
+  case ISD::TRUNCATE: {
+    // Check if the sign bits of source go down as far as the truncated value.
+    unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
+    unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    if (NumSrcSignBits > (NumSrcBits - VTBits))
+      return NumSrcSignBits - (NumSrcBits - VTBits);
     break;
+  }
   case ISD::EXTRACT_ELEMENT: {
     const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
-    const int BitWidth = Op.getValueType().getSizeInBits();
-    const int Items =
-      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+    const int BitWidth = Op.getValueSizeInBits();
+    const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
 
     // Get reverse index (starting from 1), Op1 value indexes elements from
     // little end. Sign starts at big end.
-    const int rIndex = Items - 1 -
-      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
 
     // If the sign portion ends in our element the subtraction gives correct
     // result. Otherwise it gives either negative or > bitwidth result
     return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
   }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    // At the moment we keep this simple and skip tracking the specific
+    // element. This way we get the lowest common denominator for all elements
+    // of the vector.
+    // TODO: get information for given vector element
+    const unsigned BitWidth = Op.getValueSizeInBits();
+    const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+    // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
+    // anything about sign bits. But if the sizes match we can derive knowledge
+    // about sign bits from the vector operand.
+    if (BitWidth == EltBitWidth)
+      return ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR:
+    return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+  case ISD::CONCAT_VECTORS:
+    // Determine the minimum number of sign bits across all input vectors.
+    // Early out if the result is already 1.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i)
+      Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1));
+    return Tmp;
   }
 
   // If we are looking at the loaded value of the SDNode.
@@ -2730,10 +2994,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
       switch (ExtType) {
         default: break;
         case ISD::SEXTLOAD:    // '17' bits known
-          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          Tmp = LD->getMemoryVT().getScalarSizeInBits();
           return VTBits-Tmp+1;
         case ISD::ZEXTLOAD:    // '16' bits known
-          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          Tmp = LD->getMemoryVT().getScalarSizeInBits();
           return VTBits-Tmp;
       }
     }
@@ -2842,6 +3106,16 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
 static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                                   ArrayRef<SDValue> Ops,
                                   llvm::SelectionDAG &DAG) {
+  assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
+  assert(llvm::all_of(Ops,
+                      [Ops](SDValue Op) {
+                        return Ops[0].getValueType() == Op.getValueType();
+                      }) &&
+         "Concatenation of vectors with inconsistent value types!");
+  assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
+             VT.getVectorNumElements() &&
+         "Incorrect element count in vector concatenation!");
+
   if (Ops.size() == 1)
     return Ops[0];
 
@@ -2875,7 +3149,7 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                ? DAG.getZExtOrTrunc(Op, DL, SVT)
                : DAG.getSExtOrTrunc(Op, DL, SVT);
 
-  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+  return DAG.getBuildVector(VT, DL, Elts);
 }
 
 /// Gets or creates the specified node.
@@ -2924,13 +3198,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
     case ISD::BITCAST:
       if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
-        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
       if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
       if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
-        return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -3162,8 +3436,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   case ISD::BITCAST:
     // Basic sanity checking.
-    assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
-           && "Cannot BITCAST between types of different sizes!");
+    assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+           "Cannot BITCAST between types of different sizes!");
     if (VT == Operand.getValueType()) return Operand;  // noop conversion.
     if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
       return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
@@ -3333,25 +3607,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   EVT SVT = VT.getScalarType();
   SmallVector<SDValue, 4> Outputs;
   for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
-    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
-    if (!V1 || !V2) // Not a constant, bail.
-      return SDValue();
-
-    if (V1->isOpaque() || V2->isOpaque())
-      return SDValue();
+    SDValue V1 = BV1->getOperand(I);
+    SDValue V2 = BV2->getOperand(I);
 
     // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-    // FIXME: This is valid and could be handled by truncating the APInts.
+    // FIXME: This is valid and could be handled by truncation.
     if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
       return SDValue();
 
     // Fold one vector element.
-    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
-                                              V2->getAPIntValue());
-    if (!Folded.second)
+    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+
+    // Scalar folding only succeeded if the result is a constant or UNDEF.
+    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+        ScalarResult.getOpcode() != ISD::ConstantFP)
       return SDValue();
-    Outputs.push_back(getConstant(Folded.first, DL, SVT));
+    Outputs.push_back(ScalarResult);
   }
 
   assert(VT.getVectorNumElements() == Outputs.size() &&
@@ -3394,8 +3665,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
   // All operands must be vector types with the same number of elements as
   // the result type and must be either UNDEF or a build vector of constant
   // or UNDEF scalars.
-  if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
-      !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+  if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
+      !all_of(Ops, IsScalarOrSameVectorSize))
     return SDValue();
 
   // If we are comparing vectors, then the result needs to be a i1 boolean
@@ -3577,8 +3848,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // amounts.  This catches things like trying to shift an i1024 value by an
     // i8, which is easy to fall into in generic code that uses
     // TLI.getShiftAmount().
-    assert(N2.getValueType().getSizeInBits() >=
-                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+    assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
            "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
@@ -3609,7 +3879,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(VT.isFloatingPoint() &&
            N1.getValueType().isFloatingPoint() &&
            VT.bitsLE(N1.getValueType()) &&
-           N2C && "Invalid FP_ROUND!");
+           N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
+           "Invalid FP_ROUND!");
     if (N1.getValueType() == VT) return N1;  // noop conversion.
     break;
   case ISD::AssertSext:
@@ -3640,7 +3911,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (EVT == VT) return N1;  // Not actually extending
 
     auto SignExtendInReg = [&](APInt Val) {
-      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      unsigned FromBits = EVT.getScalarSizeInBits();
       Val <<= Val.getBitWidth() - FromBits;
       Val = Val.ashr(Val.getBitWidth() - FromBits);
       return getConstant(Val, DL, VT.getScalarType());
@@ -3768,6 +4039,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       // Trivial extraction.
       if (VT.getSimpleVT() == N1.getSimpleValueType())
         return N1;
+
+      // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+      // during shuffle legalization.
+      if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+          VT == N1.getOperand(1).getValueType())
+        return N1.getOperand(1);
     }
     break;
   }
@@ -3868,7 +4145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         // Handle undef ^ undef -> 0 special case. This is a common
         // idiom (misuse).
         return getConstant(0, DL, VT);
-      // fallthrough
+      LLVM_FALLTHROUGH;
     case ISD::ADD:
     case ISD::ADDC:
     case ISD::ADDE:
@@ -3977,6 +4254,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
+  case ISD::INSERT_VECTOR_ELT: {
+    ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
+    // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
+    if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+      return getUNDEF(VT);
+    break;
+  }
   case ISD::INSERT_SUBVECTOR: {
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
@@ -4072,7 +4356,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               const SDLoc &dl) {
   assert(!Value.isUndef());
 
-  unsigned NumBits = VT.getScalarType().getSizeInBits();
+  unsigned NumBits = VT.getScalarSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
@@ -4306,10 +4590,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
@@ -4342,8 +4626,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4422,10 +4706,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
@@ -4445,8 +4729,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4519,10 +4803,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
@@ -4538,8 +4822,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4796,10 +5080,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDVTList VTList, ArrayRef<SDValue> Ops,
-                                MachineMemOperand *MMO,
-                                AtomicOrdering SuccessOrdering,
-                                AtomicOrdering FailureOrdering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   FoldingSetNodeID ID;
   ID.AddInteger(MemVT.getRawBits());
   AddNodeIDNode(ID, Opcode, VTList, Ops);
@@ -4811,8 +5092,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   }
 
   auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
-                                    VTList, MemVT, MMO, SuccessOrdering,
-                                    FailureOrdering, SynchScope);
+                                    VTList, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -4820,14 +5100,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
-                                SDVTList VTList, ArrayRef<SDValue> Ops,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
-  return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
-                   Ordering, SynchScope);
-}
-
 SDValue SelectionDAG::getAtomicCmpSwap(
     unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
     SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
@@ -4847,26 +5119,23 @@ SDValue SelectionDAG::getAtomicCmpSwap(
   auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
                MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            AAMDNodes(), nullptr, SynchScope, SuccessOrdering,
+                            FailureOrdering);
 
-  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
-                          SuccessOrdering, FailureOrdering, SynchScope);
+  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
 }
 
 SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
                                        EVT MemVT, SDVTList VTs, SDValue Chain,
                                        SDValue Ptr, SDValue Cmp, SDValue Swp,
-                                       MachineMemOperand *MMO,
-                                       AtomicOrdering SuccessOrdering,
-                                       AtomicOrdering FailureOrdering,
-                                       SynchronizationScope SynchScope) {
+                                       MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
          Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
 
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
-                   SuccessOrdering, FailureOrdering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
@@ -4892,16 +5161,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
-                            MemVT.getStoreSize(), Alignment);
+                            MemVT.getStoreSize(), Alignment, AAMDNodes(),
+                            nullptr, SynchScope, Ordering);
 
-  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
-                   Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDValue Chain, SDValue Ptr, SDValue Val,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
           Opcode == ISD::ATOMIC_LOAD_SUB ||
           Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -4921,18 +5189,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
                                                getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr, Val};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 EVT VT, SDValue Chain, SDValue Ptr,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 /// getMergeValues - Create a MERGE_VALUES node from the given operands.
@@ -5056,7 +5323,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getEVTAlignment(VT);
+    Alignment = getEVTAlignment(MemVT);
 
   MMOFlags |= MachineMemOperand::MOLoad;
   assert((MMOFlags & MachineMemOperand::MOStore) == 0);
@@ -5101,9 +5368,8 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
   ID.AddInteger(MemVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
+      dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5160,12 +5426,14 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
                                      ISD::MemIndexedMode AM) {
   LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
   assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
-  // Don't propagate the invariant flag.
+  // Don't propagate the invariant or dereferenceable flags.
   auto MMOFlags =
-      LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      LD->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
                  LD->getChain(), Base, Offset, LD->getPointerInfo(),
-                 LD->getMemoryVT(), LD->getAlignment(), MMOFlags);
+                 LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
+                 LD->getAAInfo());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -5200,8 +5468,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+      dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5265,8 +5533,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+      dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5311,17 +5579,15 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
                                     SDValue Ptr, SDValue Mask, SDValue Src0,
                                     EVT MemVT, MachineMemOperand *MMO,
-                                    ISD::LoadExtType ExtTy) {
+                                    ISD::LoadExtType ExtTy, bool isExpanding) {
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
-                                     MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
+      dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5329,7 +5595,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
     return SDValue(E, 0);
   }
   auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                        ExtTy, MemVT, MMO);
+                                        ExtTy, isExpanding, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -5340,7 +5606,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
 SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
                                      SDValue Val, SDValue Ptr, SDValue Mask,
                                      EVT MemVT, MachineMemOperand *MMO,
-                                     bool isTrunc) {
+                                     bool IsTruncating, bool IsCompressing) {
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   EVT VT = Val.getValueType();
@@ -5349,8 +5615,8 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
+      dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5358,7 +5624,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
     return SDValue(E, 0);
   }
   auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                         isTrunc, MemVT, MMO);
+                                         IsTruncating, IsCompressing, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -5374,10 +5640,8 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
-                                     MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
+      dl.getIROrder(), VTs, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5411,9 +5675,8 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
+      dl.getIROrder(), VTs, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5545,7 +5808,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
       if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
         // If the and is only masking out bits that cannot effect the shift,
         // eliminate the and.
-        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+        unsigned NumBits = VT.getScalarSizeInBits()*2;
         if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
@@ -5870,21 +6133,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
-                                   ArrayRef<SDValue> Ops) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2,
-                                   SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -5893,24 +6141,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2,
-                                   SDValue Op1, SDValue Op2,
-                                   SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2, EVT VT3,
-                                   SDValue Op1, SDValue Op2,
-                                   SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    SDVTList VTs,ArrayRef<SDValue> Ops) {
   SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
   // Reset the NodeID to -1.
@@ -5922,14 +6152,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
   return New;
 }
 
-/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
 /// the line number information on the merged node since it is not possible to
 /// preserve the information that operation is associated with multiple lines.
 /// This will make the debugger working better at -O0, were there is a higher
 /// probability having other instructions associated with that line.
 ///
 /// For IROrder, we keep the smaller of the two
-SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) {
+SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
   if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
@@ -5963,7 +6193,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops);
     if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
-      return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
+      return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -6050,19 +6280,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
 }
 
 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, None);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2, SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
                                             EVT VT1, EVT VT2, SDValue Op1,
                                             SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -6110,13 +6327,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
 }
 
 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2, EVT VT3, EVT VT4,
-                                            ArrayRef<SDValue> Ops) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
                                             ArrayRef<EVT> ResultTys,
                                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(ResultTys);
@@ -6135,7 +6345,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
     AddNodeIDNode(ID, ~Opcode, VTs, Ops);
     IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
-      return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
+      return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
     }
   }
 
@@ -6255,6 +6465,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
          "Cannot replace with this method!");
   assert(From != To.getNode() && "Cannot replace uses of with self");
 
+  // Preserve Debug Values
+  TransferDbgValues(FromN, To);
+
   // Iterate over all the existing uses of From. New uses will be added
   // to the beginning of the use list, which we avoid visiting.
   // This specifically avoids visiting uses of From that arise while the
@@ -6285,8 +6498,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
     AddModifiedNodeToCSEMaps(User);
   }
 
-  // Preserve Debug Values
-  TransferDbgValues(FromN, To);
 
   // If we just RAUW'd the root, take note.
   if (FromN == getRoot())
@@ -6689,6 +6900,40 @@ bool llvm::isBitwiseNot(SDValue V) {
   return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
 }
 
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+    // BuildVectors can truncate their operands. Ignore that case here.
+    // FIXME: We blindly ignore splats which include undef which is overly
+    // pessimistic.
+    if (CN && UndefElements.none() &&
+        CN->getValueType(0) == N.getValueType().getScalarType())
+      return CN;
+  }
+
+  return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+    if (CN && UndefElements.none())
+      return CN;
+  }
+
+  return nullptr;
+}
+
 HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
@@ -6710,11 +6955,11 @@ AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
 MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
                      SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
     : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal(), MMO->isInvariant());
-  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
-  assert(isNonTemporal() == MMO->isNonTemporal() &&
-         "Non-temporal encoding error!");
+  MemSDNodeBits.IsVolatile = MMO->isVolatile();
+  MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
+  MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
+  MemSDNodeBits.IsInvariant = MMO->isInvariant();
+
   // We check here that the size of the memory operand fits within the size of
   // the MMO. This is because the MMO might indicate only a possible address
   // range instead of specifying the affected memory addresses precisely.
@@ -6939,8 +7184,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   for (; i < ResNE; ++i)
     Scalars.push_back(getUNDEF(EltVT));
 
-  return getNode(ISD::BUILD_VECTOR, dl,
-                 EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
+  EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+  return getBuildVector(VecVT, dl, Scalars);
 }
 
 bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
@@ -6962,13 +7207,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
   if (Loc.getOpcode() == ISD::FrameIndex) {
     if (BaseLoc.getOpcode() != ISD::FrameIndex)
       return false;
-    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
-    int FS  = MFI->getObjectSize(FI);
-    int BFS = MFI->getObjectSize(BFI);
+    int FS  = MFI.getObjectSize(FI);
+    int BFS = MFI.getObjectSize(BFI);
     if (FS != BFS || FS != (int)Bytes) return false;
-    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
   }
 
   // Handle X + C.
@@ -7033,7 +7278,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   }
 
   if (FrameIdx != (1 << 31)) {
-    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
     unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
                                     FrameOffset);
     return FIInfoAlign;
@@ -7124,7 +7369,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   // false.
   unsigned int nOps = getNumOperands();
   assert(nOps > 0 && "isConstantSplat has 0-size build vector");
-  unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltBitSize = VT.getScalarSizeInBits();
 
   for (unsigned j = 0; j < nOps; ++j) {
     unsigned i = isBigEndian ? nOps-1-j : j;
@@ -7265,6 +7510,16 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
   return nullptr;
 }
 
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+  if (isa<ConstantFPSDNode>(N))
+    return N.getNode();
+
+  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+    return N.getNode();
+
+  return nullptr;
+}
+
 #ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e1fc37d..996c95b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -183,7 +184,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
         Hi =
             DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                        DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+                        DAG.getConstant(Lo.getValueSizeInBits(), DL,
                                         TLI.getPointerTy(DAG.getDataLayout())));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
@@ -833,8 +834,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
 
       if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
         // If we clobbered the stack pointer, MFI should know about it.
-        assert(DAG.getMachineFunction().getFrameInfo()->
-            hasOpaqueSPAdjustment());
+        assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
       }
     }
   }
@@ -931,46 +931,9 @@ SDValue SelectionDAGBuilder::getControlRoot() {
   return Root;
 }
 
-/// Copy swift error to the final virtual register at end of a basic block, as
-/// specified by SwiftErrorWorklist, if necessary.
-static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) {
-  const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo();
-  if (!TLI.supportSwiftError())
-    return;
-
-  if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB))
-    return;
-
-  // Go through entries in SwiftErrorWorklist, and create copy as necessary.
-  FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry =
-      SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB];
-  FunctionLoweringInfo::SwiftErrorVRegs &MapEntry =
-      SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB];
-  for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) {
-    unsigned WorkReg = WorklistEntry[I];
-
-    // Find the swifterror virtual register for the value in SwiftErrorMap.
-    unsigned MapReg = MapEntry[I];
-    assert(TargetRegisterInfo::isVirtualRegister(MapReg) &&
-           "Entries in SwiftErrorMap should be virtual registers");
-
-    if (WorkReg == MapReg)
-      continue;
-
-    // Create copy from SwiftErrorMap to SwiftWorklist.
-    auto &DL = SDB.DAG.getDataLayout();
-    SDValue CopyNode = SDB.DAG.getCopyToReg(
-        SDB.getRoot(), SDB.getCurSDLoc(), WorkReg,
-        SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL))));
-    MapEntry[I] = WorkReg;
-    SDB.DAG.setRoot(CopyNode);
-  }
-}
-
 void SelectionDAGBuilder::visit(const Instruction &I) {
   // Set up outgoing PHI node register values before emitting the terminator.
   if (isa<TerminatorInst>(&I)) {
-    copySwiftErrorsToFinalVRegs(*this);
     HandlePHINodesInSuccessorBlocks(I.getParent());
   }
 
@@ -1021,8 +984,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     if (Val.getNode()) {
       if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
                                     Val)) {
-        SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
-                              false, Offset, dl, DbgSDNodeOrder);
+        SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
@@ -1491,6 +1453,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const Function *F = I.getParent()->getParent();
   if (TLI.supportSwiftError() &&
       F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+    assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
     Flags.setSwiftError();
     Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1498,7 +1461,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                   true /*isfixed*/, 1 /*origidx*/,
                                   0 /*partOffs*/));
     // Create SDNode for the swifterror virtual register.
-    OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0],
+    OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(
+                                          FuncInfo.MBB, FuncInfo.SwiftErrorArg),
                                       EVT(TLI.getPointerTy(DL))));
   }
 
@@ -2012,7 +1976,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
   if (Global) {
     MachinePointerInfo MPInfo(Global);
     MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
-    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+                 MachineMemOperand::MODereferenceable;
     *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
                                        DAG.getEVTAlignment(PtrTy));
     Node->setMemRefs(MemRefs, MemRefs + 1);
@@ -2033,8 +1998,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
 
-  MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
-  int FI = MFI->getStackProtectorIndex();
+  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+  int FI = MFI.getStackProtectorIndex();
 
   SDValue Guard;
   SDLoc dl = getCurSDLoc();
@@ -2329,8 +2294,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
          "Call to landingpad not in landing pad!");
 
   MachineBasicBlock *MBB = FuncInfo.MBB;
-  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  AddLandingPadInfo(LP, MMI, MBB);
+  addLandingPadInfo(LP, *MBB);
 
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother to create these DAG nodes.
@@ -2484,7 +2448,7 @@ static bool isVectorReductionOp(const User *I) {
     if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
       if (FPOp->getFastMathFlags().unsafeAlgebra())
         break;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   default:
     return false;
   }
@@ -2639,7 +2603,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
     unsigned ShiftSize = ShiftTy.getSizeInBits();
-    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    unsigned Op2Size = Op2.getValueSizeInBits();
     SDLoc DL = getCurSDLoc();
 
     // If the operand is smaller than the shift count type, promote it.
@@ -2650,7 +2614,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
       Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
     // Otherwise we'll need to temporarily settle for some other convenient
     // type.  Type legalization will make adjustments once the shiftee is split.
@@ -2731,7 +2695,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
 // Check if the condition of the select has one use or two users that are both
 // selects with the same condition.
 static bool hasOnlySelectUsers(const Value *Cond) {
-  return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) {
+  return all_of(Cond->users(), [](const Value *V) {
     return isa<SelectInst>(V);
   });
 }
@@ -2998,6 +2962,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
+  SDLoc DL = getCurSDLoc();
 
   SmallVector<int, 8> Mask;
   ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
@@ -3009,54 +2974,60 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
   if (SrcNumElts == MaskNumElts) {
-    setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask));
+    setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
     return;
   }
 
   // Normalize the shuffle vector since mask and vector length don't match.
-  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
-    // Mask is longer than the source vectors and is a multiple of the source
-    // vectors.  We can use concatenate vector to make the mask and vectors
-    // lengths match.
-
-    unsigned NumConcat = MaskNumElts / SrcNumElts;
-
-    // Check if the shuffle is some kind of concatenation of the input vectors.
-    bool IsConcat = true;
-    SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
-    for (unsigned i = 0; i != MaskNumElts; ++i) {
-      int Idx = Mask[i];
-      if (Idx < 0)
-        continue;
-      // Ensure the indices in each SrcVT sized piece are sequential and that
-      // the same source is used for the whole piece.
-      if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
-          (ConcatSrcs[i / SrcNumElts] >= 0 &&
-           ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
-        IsConcat = false;
-        break;
+  if (SrcNumElts < MaskNumElts) {
+    // Mask is longer than the source vectors. We can use concatenate vector to
+    // make the mask and vectors lengths match.
+
+    if (MaskNumElts % SrcNumElts == 0) {
+      // Mask length is a multiple of the source vector length.
+      // Check if the shuffle is some kind of concatenation of the input
+      // vectors.
+      unsigned NumConcat = MaskNumElts / SrcNumElts;
+      bool IsConcat = true;
+      SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          continue;
+        // Ensure the indices in each SrcVT sized piece are sequential and that
+        // the same source is used for the whole piece.
+        if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+            (ConcatSrcs[i / SrcNumElts] >= 0 &&
+             ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+          IsConcat = false;
+          break;
+        }
+        // Remember which source this index came from.
+        ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
       }
-      // Remember which source this index came from.
-      ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
-    }
 
-    // The shuffle is concatenating multiple vectors together. Just emit
-    // a CONCAT_VECTORS operation.
-    if (IsConcat) {
-      SmallVector<SDValue, 8> ConcatOps;
-      for (auto Src : ConcatSrcs) {
-        if (Src < 0)
-          ConcatOps.push_back(DAG.getUNDEF(SrcVT));
-        else if (Src == 0)
-          ConcatOps.push_back(Src1);
-        else
-          ConcatOps.push_back(Src2);
+      // The shuffle is concatenating multiple vectors together. Just emit
+      // a CONCAT_VECTORS operation.
+      if (IsConcat) {
+        SmallVector<SDValue, 8> ConcatOps;
+        for (auto Src : ConcatSrcs) {
+          if (Src < 0)
+            ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+          else if (Src == 0)
+            ConcatOps.push_back(Src1);
+          else
+            ConcatOps.push_back(Src2);
+        }
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+        return;
       }
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
-                               VT, ConcatOps));
-      return;
     }
 
+    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+    unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+    EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+                                    PaddedMaskNumElts);
+
     // Pad both vectors with undefs to make them the same length as the mask.
     SDValue UndefVal = DAG.getUNDEF(SrcVT);
 
@@ -3065,24 +3036,32 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     MOps1[0] = Src1;
     MOps2[0] = Src2;
 
-    Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS,
-                                        getCurSDLoc(), VT, MOps1);
-    Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS,
-                                        getCurSDLoc(), VT, MOps2);
+    Src1 = Src1.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+    Src2 = Src2.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
 
     // Readjust mask for new input vector length.
-    SmallVector<int, 8> MappedOps;
+    SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
       if (Idx >= (int)SrcNumElts)
-        Idx -= SrcNumElts - MaskNumElts;
-      MappedOps.push_back(Idx);
+        Idx -= SrcNumElts - PaddedMaskNumElts;
+      MappedOps[i] = Idx;
     }
 
-    setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
-                                      MappedOps));
+    SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+    // If the concatenated vector was padded, extract a subvector with the
+    // correct number of elements.
+    if (MaskNumElts != PaddedMaskNumElts)
+      Result = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+    setValue(&I, Result);
     return;
   }
 
@@ -3141,10 +3120,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
         else {
-          SDLoc dl = getCurSDLoc();
           Src = DAG.getNode(
-              ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
-              DAG.getConstant(StartIdx[Input], dl,
+              ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+              DAG.getConstant(StartIdx[Input], DL,
                               TLI.getVectorIdxTy(DAG.getDataLayout())));
         }
       }
@@ -3162,8 +3140,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         MappedOps.push_back(Idx);
       }
 
-      setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
-                                        MappedOps));
+      setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
       return;
     }
   }
@@ -3173,7 +3150,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
   EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-  SDLoc dl = getCurSDLoc();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     int Idx = Mask[i];
@@ -3185,14 +3161,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                        EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                        EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
     }
 
     Ops.push_back(Res);
   }
 
-  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops));
 }
 
 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3293,13 +3269,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   if (VectorWidth && !N.getValueType().isVector()) {
     LLVMContext &Context = *DAG.getContext();
     EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
-    SmallVector<SDValue, 16> Ops(VectorWidth, N);
-    N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+    N = DAG.getSplatBuildVector(VT, dl, N);
   }
+
   for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
-    if (StructType *StTy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
       unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
       if (Field) {
         // N = N + Offset
@@ -3331,8 +3307,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+        LLVMContext &Context = *DAG.getContext();
         SDValue OffsVal = VectorWidth ?
-          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
           DAG.getConstant(Offs, dl, PtrTy);
 
         // In an inbouds GEP with an offset that is nonnegative even when
@@ -3350,9 +3327,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       if (!IdxN.getValueType().isVector() && VectorWidth) {
         MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
-        SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
-        IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
       }
+
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -3433,7 +3410,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
 
-  assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
+  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
 }
 
 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3462,17 +3439,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
-
-  // The IR notion of invariant_load only guarantees that all *non-faulting*
-  // invariant loads result in the same value.  The MI notion of invariant load
-  // guarantees that the load can be legally moved to any location within its
-  // containing function.  The MI notion of invariant_load is stronger than the
-  // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
-  // with a guarantee that the location being loaded from is dereferenceable
-  // throughout the function's lifetime.
-
-  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
-                     isDereferenceablePointer(SV, DAG.getDataLayout());
+  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
   unsigned Alignment = I.getAlignment();
 
   AAMDNodes AAInfo;
@@ -3540,6 +3508,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
       MMOFlags |= MachineMemOperand::MONonTemporal;
     if (isInvariant)
       MMOFlags |= MachineMemOperand::MOInvariant;
+    if (isDereferenceable)
+      MMOFlags |= MachineMemOperand::MODereferenceable;
 
     SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
                             MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -3585,7 +3555,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
   SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
                                       SDValue(Src.getNode(), Src.getResNo()));
   DAG.setRoot(CopyNode);
-  FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
 }
 
 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3613,9 +3583,9 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
          "expect a single EVT for swifterror");
 
   // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
-  SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(),
-                                 FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV),
-                                 ValueVTs[0]);
+  SDValue L = DAG.getCopyFromReg(
+      getRoot(), getCurSDLoc(),
+      FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]);
 
   setValue(&I, L);
 }
@@ -3697,16 +3667,39 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   DAG.setRoot(StoreNode);
 }
 
-void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+                                           bool IsCompressing) {
   SDLoc sdl = getCurSDLoc();
 
-  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
-  Value  *PtrOperand = I.getArgOperand(1);
+  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+    Src0 = I.getArgOperand(0);
+    Ptr = I.getArgOperand(1);
+    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+    Mask = I.getArgOperand(3);
+  };
+  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // llvm.masked.compressstore.*(Src0, Ptr, Mask)
+    Src0 = I.getArgOperand(0);
+    Ptr = I.getArgOperand(1);
+    Mask = I.getArgOperand(2);
+    Alignment = 0;
+  };
+
+  Value  *PtrOperand, *MaskOperand, *Src0Operand;
+  unsigned Alignment;
+  if (IsCompressing)
+    getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+  else
+    getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
   SDValue Ptr = getValue(PtrOperand);
-  SDValue Src0 = getValue(I.getArgOperand(0));
-  SDValue Mask = getValue(I.getArgOperand(3));
+  SDValue Src0 = getValue(Src0Operand);
+  SDValue Mask = getValue(MaskOperand);
+
   EVT VT = Src0.getValueType();
-  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
 
@@ -3719,7 +3712,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
                           MachineMemOperand::MOStore,  VT.getStoreSize(),
                           Alignment, AAInfo);
   SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
-                                         MMO, false);
+                                         MMO, false /* Truncating */,
+                                         IsCompressing);
   DAG.setRoot(StoreNode);
   setValue(&I, StoreNode);
 }
@@ -3740,7 +3734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
 // extract the spalt value and use it as a uniform base.
 // In all other cases the function returns 'false'.
 //
-static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
+static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
                            SelectionDAGBuilder* SDB) {
 
   SelectionDAG& DAG = SDB->DAG;
@@ -3777,8 +3771,7 @@ static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
   if (!Index.getValueType().isVector()) {
     unsigned GEPWidth = GEP->getType()->getVectorNumElements();
     EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
-    SmallVector<SDValue, 16> Ops(GEPWidth, Index);
-    Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+    Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
   }
   return true;
 }
@@ -3820,18 +3813,38 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   setValue(&I, Scatter);
 }
 
-void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDLoc sdl = getCurSDLoc();
 
-  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
-  Value  *PtrOperand = I.getArgOperand(0);
+  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+    Ptr = I.getArgOperand(0);
+    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+    Mask = I.getArgOperand(2);
+    Src0 = I.getArgOperand(3);
+  };
+  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // @llvm.masked.expandload.*(Ptr, Mask, Src0)
+    Ptr = I.getArgOperand(0);
+    Alignment = 0;
+    Mask = I.getArgOperand(1);
+    Src0 = I.getArgOperand(2);
+  };
+
+  Value  *PtrOperand, *MaskOperand, *Src0Operand;
+  unsigned Alignment;
+  if (IsExpanding)
+    getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+  else
+    getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
   SDValue Ptr = getValue(PtrOperand);
-  SDValue Src0 = getValue(I.getArgOperand(3));
-  SDValue Mask = getValue(I.getArgOperand(2));
+  SDValue Src0 = getValue(Src0Operand);
+  SDValue Mask = getValue(MaskOperand);
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
-  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+  EVT VT = Src0.getValueType();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
 
@@ -3851,7 +3864,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
                           Alignment, AAInfo, Ranges);
 
   SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
-                                   ISD::NON_EXTLOAD);
+                                   ISD::NON_EXTLOAD, IsExpanding);
   if (AddToChain) {
     SDValue OutChain = Load.getValue(1);
     DAG.setRoot(OutChain);
@@ -4003,13 +4016,13 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
                            MachineMemOperand::MOLoad,
                            VT.getStoreSize(),
                            I.getAlignment() ? I.getAlignment() :
-                                              DAG.getEVTAlignment(VT));
+                                              DAG.getEVTAlignment(VT),
+                           AAMDNodes(), nullptr, Scope, Order);
 
   InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
   SDValue L =
       DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
-                    getValue(I.getPointerOperand()), MMO,
-                    Order, Scope);
+                    getValue(I.getPointerOperand()), MMO);
 
   SDValue OutChain = L.getValue(1);
 
@@ -4047,8 +4060,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 /// node.
 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
-  bool HasChain = !I.doesNotAccessMemory();
-  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+  // Ignore the callsite's attributes. A specific call site may be marked with
+  // readnone, but the lowering code will expect the chain based on the
+  // definition.
+  const Function *F = I.getCalledFunction();
+  bool HasChain = !F->doesNotAccessMemory();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;
@@ -4156,7 +4173,7 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
 /// getF32Constant - Get 32-bit floating point constant.
 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
                               const SDLoc &dl) {
-  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
+  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
                            MVT::f32);
 }
 
@@ -4743,6 +4760,32 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   return true;
 }
 
+/// Return the appropriate SDDbgValue based on N.
+SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
+                                             DILocalVariable *Variable,
+                                             DIExpression *Expr, int64_t Offset,
+                                             DebugLoc dl,
+                                             unsigned DbgSDNodeOrder) {
+  SDDbgValue *SDV;
+  auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
+  if (FISDN && Expr->startsWithDeref()) {
+    // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
+    // stack slot locations as such instead of as indirectly addressed
+    // locations.
+    ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1,
+                                        Expr->elements_end());
+    DIExpression *DerefedDIExpr =
+        DIExpression::get(*DAG.getContext(), TrailingElements);
+    int FI = FISDN->getIndex();
+    SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl,
+                                    DbgSDNodeOrder);
+  } else {
+    SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
+                          Offset, dl, DbgSDNodeOrder);
+  }
+  return SDV;
+}
+
 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
                          !defined(setjmp_undefined_for_msvc)
@@ -4774,6 +4817,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
+  case Intrinsic::addressofreturnaddress:
+    setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+                             TLI.getPointerTy(DAG.getDataLayout())));
+    return nullptr;
   case Intrinsic::frameaddress:
     setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
@@ -4850,6 +4897,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
+  case Intrinsic::memcpy_element_atomic: {
+    SDValue Dst = getValue(I.getArgOperand(0));
+    SDValue Src = getValue(I.getArgOperand(1));
+    SDValue NumElements = getValue(I.getArgOperand(2));
+    SDValue ElementSize = getValue(I.getArgOperand(3));
+
+    // Emit a library call.
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+    Entry.Node = Dst;
+    Args.push_back(Entry);
+
+    Entry.Node = Src;
+    Args.push_back(Entry);
+    
+    Entry.Ty = I.getArgOperand(2)->getType();
+    Entry.Node = NumElements;
+    Args.push_back(Entry);
+    
+    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+    Entry.Node = ElementSize;
+    Args.push_back(Entry);
+
+    uint64_t ElementSizeConstant =
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    RTLIB::Libcall LibraryCall =
+        RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+      report_fatal_error("Unsupported element size");
+
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(sdl)
+        .setChain(getRoot())
+        .setCallee(TLI.getLibcallCallingConv(LibraryCall),
+                   Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol(
+                       TLI.getLibcallName(LibraryCall),
+                       TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args));
+
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+    DAG.setRoot(CallResult.second);
+    return nullptr;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     DILocalVariable *Variable = DI.getVariable();
@@ -4944,8 +5036,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       false, N)) {
-          SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
-                                false, Offset, dl, SDNodeOrder);
+          SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
       } else if (!V->use_empty() ) {
@@ -4980,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_typeid_for: {
     // Find the type id for the given typeinfo.
     GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
-    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+    unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
     Res = DAG.getConstant(TypeID, sdl, MVT::i32);
     setValue(&I, Res);
     return nullptr;
@@ -4988,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::eh_return_i32:
   case Intrinsic::eh_return_i64:
-    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+    DAG.getMachineFunction().setCallsEHReturn(true);
     DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
                             MVT::Other,
                             getControlRoot(),
@@ -4996,7 +5087,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getValue(I.getArgOperand(1))));
     return nullptr;
   case Intrinsic::eh_unwind_init:
-    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+    DAG.getMachineFunction().setCallsUnwindInit(true);
     return nullptr;
   case Intrinsic::eh_dwarf_cfa: {
     setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
@@ -5015,11 +5106,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_functioncontext: {
     // Get and store the index of the function context.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
     AllocaInst *FnCtx =
       cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
     int FI = FuncInfo.StaticAllocaMap[FnCtx];
-    MFI->setFunctionContextIndex(FI);
+    MFI.setFunctionContextIndex(FI);
     return nullptr;
   }
   case Intrinsic::eh_sjlj_setjmp: {
@@ -5055,6 +5146,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::masked_store:
     visitMaskedStore(I);
     return nullptr;
+  case Intrinsic::masked_expandload:
+    visitMaskedLoad(I, true /* IsExpanding */);
+    return nullptr;
+  case Intrinsic::masked_compressstore:
+    visitMaskedStore(I, true /* IsCompressing */);
+    return nullptr;
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
   case Intrinsic::x86_mmx_pslli_q:
@@ -5114,39 +5211,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return nullptr;
   }
-  case Intrinsic::convertff:
-  case Intrinsic::convertfsi:
-  case Intrinsic::convertfui:
-  case Intrinsic::convertsif:
-  case Intrinsic::convertuif:
-  case Intrinsic::convertss:
-  case Intrinsic::convertsu:
-  case Intrinsic::convertus:
-  case Intrinsic::convertuu: {
-    ISD::CvtCode Code = ISD::CVT_INVALID;
-    switch (Intrinsic) {
-    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
-    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
-    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
-    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
-    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
-    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
-    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
-    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
-    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
-    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
-    }
-    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
-    const Value *Op1 = I.getArgOperand(0);
-    Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
-                               DAG.getValueType(DestVT),
-                               DAG.getValueType(getValue(Op1).getValueType()),
-                               getValue(I.getArgOperand(1)),
-                               getValue(I.getArgOperand(2)),
-                               Code);
-    setValue(&I, Res);
-    return nullptr;
-  }
   case Intrinsic::powi:
     setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1)), DAG));
@@ -5368,7 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::stackprotector: {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
-    MachineFrameInfo *MFI = MF.getFrameInfo();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
     SDValue Src, Chain = getRoot();
 
@@ -5380,7 +5444,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
 
     int FI = FuncInfo.StaticAllocaMap[Slot];
-    MFI->setStackProtectorIndex(FI);
+    MFI.setStackProtectorIndex(FI);
 
     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
 
@@ -5411,6 +5475,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::annotation:
   case Intrinsic::ptr_annotation:
+  case Intrinsic::invariant_group_barrier:
     // Drop the intrinsic, but forward the value
     setValue(&I, getValue(I.getOperand(0)));
     return nullptr;
@@ -5687,7 +5752,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 std::pair<SDValue, SDValue>
 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
                                     const BasicBlock *EHPadBB) {
-  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineModuleInfo &MMI = MF.getMMI();
   MCSymbol *BeginLabel = nullptr;
 
   if (EHPadBB) {
@@ -5699,7 +5765,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     // so as to maintain the ordering of pads in the LSDA.
     unsigned CallSiteIndex = MMI.getCurrentCallSite();
     if (CallSiteIndex) {
-      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
       LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
 
       // Now that the call site is handled, stop tracking it.
@@ -5740,13 +5806,13 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
-    if (MMI.hasEHFunclets()) {
+    if (MF.hasEHFunclets()) {
       assert(CLI.CS);
       WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
       EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
                                 BeginLabel, EndLabel);
     } else {
-      MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+      MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
     }
   }
 
@@ -5766,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   const Value *SwiftErrorVal = nullptr;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // We can't tail call inside a function with a swifterror argument. Lowering
+  // does not support this yet. It would have to move into the swifterror
+  // register before the call.
+  auto *Caller = CS.getInstruction()->getParent()->getParent();
+  if (TLI.supportSwiftError() &&
+      Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+    isTailCall = false;
+
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     const Value *V = *i;
@@ -5785,9 +5860,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SwiftErrorVal = V;
       // We find the virtual register for the actual swifterror argument.
       // Instead of using the Value, we use the virtual register instead.
-      Entry.Node = DAG.getRegister(
-          FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V),
-          EVT(TLI.getPointerTy(DL)));
+      Entry.Node =
+          DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V),
+                          EVT(TLI.getPointerTy(DL)));
     }
 
     Args.push_back(Entry);
@@ -5803,6 +5878,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
     isTailCall = false;
 
+  // Disable tail calls if there is an swifterror argument. Targets have not
+  // been updated to support tail calls.
+  if (TLI.supportSwiftError() && SwiftErrorVal)
+    isTailCall = false;
+
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc())
       .setChain(getRoot())
@@ -5827,7 +5907,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
     SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
     // We update the virtual register for the actual swifterror argument.
-    FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+    FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
     DAG.setRoot(CopyNode);
   }
 }
@@ -6035,6 +6115,49 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
   return false;
 }
 
+///
+/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
+/// to adjust the dst pointer by the size of the copied memory.
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+
+  // Verify argument count: void *mempcpy(void *, const void *, size_t)
+  if (I.getNumArgOperands() != 3)
+    return false;
+
+  SDValue Dst = getValue(I.getArgOperand(0));
+  SDValue Src = getValue(I.getArgOperand(1));
+  SDValue Size = getValue(I.getArgOperand(2));
+
+  unsigned DstAlign = DAG.InferPtrAlignment(Dst);
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  unsigned Align = std::min(DstAlign, SrcAlign);
+  if (Align == 0) // Alignment of one or both could not be inferred.
+    Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+
+  bool isVol = false;
+  SDLoc sdl = getCurSDLoc();
+
+  // In the mempcpy context we need to pass in a false value for isTailCall
+  // because the return pointer needs to be adjusted by the size of
+  // the copied memory.
+  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+                             false, /*isTailCall=*/false,
+                             MachinePointerInfo(I.getArgOperand(0)),
+                             MachinePointerInfo(I.getArgOperand(1)));
+  assert(MC.getNode() != nullptr &&
+         "** memcpy should not be lowered as TailCall in mempcpy context **");
+  DAG.setRoot(MC);
+
+  // Check if Size needs to be truncated or extended.
+  Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
+
+  // Adjust return pointer to point just past the last dst byte.
+  SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
+                                    Dst, Size);
+  setValue(&I, DstPlusSize);
+  return true;
+}
+
 /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
 /// optimized form.  If so, return true and lower it, otherwise return false
 /// and it will be lowered like a normal call.
@@ -6191,7 +6314,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   }
 
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  ComputeUsesVAFloatArgument(I, &MMI);
+  computeUsesVAFloatArgument(I, MMI);
 
   const char *RenameFn = nullptr;
   if (Function *F = I.getCalledFunction()) {
@@ -6325,6 +6448,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitMemCmpCall(I))
           return;
         break;
+      case LibFunc::mempcpy:
+        if (visitMemPCpyCall(I))
+          return;
+        break;
       case LibFunc::memchr:
         if (visitMemChrCall(I))
           return;
@@ -6395,6 +6522,19 @@ public:
     : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
   }
 
+  /// Whether or not this operand accesses memory
+  bool hasMemory(const TargetLowering &TLI) const {
+    // Indirect operand accesses access memory.
+    if (isIndirect)
+      return true;
+
+    for (const auto &Code : Codes)
+      if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
+        return true;
+
+    return false;
+  }
+
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
@@ -6447,6 +6587,75 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 
 } // end anonymous namespace
 
+/// Make sure that the output operand \p OpInfo and its corresponding input
+/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
+/// out).
+static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
+                               SDISelAsmOperandInfo &MatchingOpInfo,
+                               SelectionDAG &DAG) {
+  if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
+    return;
+
+  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+      TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                       OpInfo.ConstraintVT);
+  std::pair<unsigned, const TargetRegisterClass *> InputRC =
+      TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
+                                       MatchingOpInfo.ConstraintVT);
+  if ((OpInfo.ConstraintVT.isInteger() !=
+       MatchingOpInfo.ConstraintVT.isInteger()) ||
+      (MatchRC.second != InputRC.second)) {
+    // FIXME: error out in a more elegant fashion
+    report_fatal_error("Unsupported asm: input constraint"
+                       " with a matching output constraint of"
+                       " incompatible type!");
+  }
+  MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
+}
+
+/// Get a direct memory input to behave well as an indirect operand.
+/// This may introduce stores, hence the need for a \p Chain.
+/// \return The (possibly updated) chain.
+static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
+                                        SDISelAsmOperandInfo &OpInfo,
+                                        SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // If we don't have an indirect input, put it in the constpool if we can,
+  // otherwise spill it to a stack slot.
+  // TODO: This isn't quite right. We need to handle these according to
+  // the addressing mode that the constraint wants. Also, this may take
+  // an additional register for the computation and we don't want that
+  // either.
+
+  // If the operand is a float, integer, or vector constant, spill to a
+  // constant pool entry to get its address.
+  const Value *OpVal = OpInfo.CallOperandVal;
+  if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+      isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+    OpInfo.CallOperand = DAG.getConstantPool(
+        cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+    return Chain;
+  }
+
+  // Otherwise, create a stack slot and emit a store to it before the asm.
+  Type *Ty = OpVal->getType();
+  auto &DL = DAG.getDataLayout();
+  uint64_t TySize = DL.getTypeAllocSize(Ty);
+  unsigned Align = DL.getPrefTypeAlignment(Ty);
+  MachineFunction &MF = DAG.getMachineFunction();
+  int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+  Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+                       MachinePointerInfo::getFixedStack(MF, SSFI));
+  OpInfo.CallOperand = StackSlot;
+
+  return Chain;
+}
+
 /// GetRegistersForValue - Assign registers (virtual or physical) for the
 /// specified operand.  We prefer to assign virtual registers, to allow the
 /// register allocator to handle the assignment process.  However, if the asm
@@ -6555,6 +6764,73 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
+static unsigned
+findMatchingInlineAsmOperand(unsigned OperandNo,
+                             const std::vector<SDValue> &AsmNodeOperands) {
+  // Scan until we find the definition we already emitted of this operand.
+  unsigned CurOp = InlineAsm::Op_FirstOperand;
+  for (; OperandNo; --OperandNo) {
+    // Advance to the next operand.
+    unsigned OpFlag =
+        cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+    assert((InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+            InlineAsm::isMemKind(OpFlag)) &&
+           "Skipped past definitions?");
+    CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+  }
+  return CurOp;
+}
+
+/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
+/// \return true if it has succeeded, false otherwise
+static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
+                              MVT RegVT, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+  for (unsigned i = 0, e = NumRegs; i != e; ++i) {
+    if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+    else
+      return false;
+  }
+  return true;
+}
+
+class ExtraFlags {
+  unsigned Flags = 0;
+
+public:
+  explicit ExtraFlags(ImmutableCallSite CS) {
+    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+    if (IA->hasSideEffects())
+      Flags |= InlineAsm::Extra_HasSideEffects;
+    if (IA->isAlignStack())
+      Flags |= InlineAsm::Extra_IsAlignStack;
+    if (CS.isConvergent())
+      Flags |= InlineAsm::Extra_IsConvergent;
+    Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+  }
+
+  void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+    // Ideally, we would only check against memory constraints.  However, the
+    // meaning of an Other constraint can be target-specific and we can't easily
+    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
+    // for Other constraints as well.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+        OpInfo.ConstraintType == TargetLowering::C_Other) {
+      if (OpInfo.Type == InlineAsm::isInput)
+        Flags |= InlineAsm::Extra_MayLoad;
+      else if (OpInfo.Type == InlineAsm::isOutput)
+        Flags |= InlineAsm::Extra_MayStore;
+      else if (OpInfo.Type == InlineAsm::isClobber)
+        Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+    }
+  }
+
+  unsigned get() const { return Flags; }
+};
+
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
@@ -6569,6 +6845,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   bool hasMemory = false;
 
+  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+  ExtraFlags ExtraInfo(CS);
+
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   unsigned ResNo = 0;   // ResNo - The result number of the next output.
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
@@ -6578,14 +6857,25 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     MVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput:
-      // Indirect outputs just consume an argument.
-      if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
-        break;
+    if (OpInfo.Type == InlineAsm::isInput ||
+        (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+
+      // Process the call argument. BasicBlocks are labels, currently appearing
+      // only in asm's.
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
+      OpVT =
+          OpInfo
+              .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
+              .getSimpleVT();
+    }
+
+    if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
@@ -6597,43 +6887,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
       }
       ++ResNo;
-      break;
-    case InlineAsm::isInput:
-      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
-      break;
-    case InlineAsm::isClobber:
-      // Nothing to do.
-      break;
     }
 
-    // If this is an input or an indirect output, process the call argument.
-    // BasicBlocks are labels, currently appearing only in asm's.
-    if (OpInfo.CallOperandVal) {
-      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
-        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
-      } else {
-        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
-      }
+    OpInfo.ConstraintVT = OpVT;
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
-                                         DAG.getDataLayout()).getSimpleVT();
-    }
+    if (!hasMemory)
+      hasMemory = OpInfo.hasMemory(TLI);
 
-    OpInfo.ConstraintVT = OpVT;
+    // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+    // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
+    auto TargetConstraint = TargetConstraints[i];
 
-    // Indirect operand accesses access memory.
-    if (OpInfo.isIndirect)
-      hasMemory = true;
-    else {
-      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
-        TargetLowering::ConstraintType
-          CType = TLI.getConstraintType(OpInfo.Codes[j]);
-        if (CType == TargetLowering::C_Memory) {
-          hasMemory = true;
-          break;
-        }
-      }
-    }
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+
+    ExtraInfo.update(TargetConstraint);
   }
 
   SDValue Chain, Flag;
@@ -6656,24 +6924,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
-      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
-        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
-            TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
-                                             OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass *> InputRC =
-            TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
-                                             Input.ConstraintVT);
-        if ((OpInfo.ConstraintVT.isInteger() !=
-             Input.ConstraintVT.isInteger()) ||
-            (MatchRC.second != InputRC.second)) {
-          report_fatal_error("Unsupported asm: input constraint"
-                             " with a matching output constraint of"
-                             " incompatible type!");
-        }
-        Input.ConstraintVT = OpInfo.ConstraintVT;
-      }
+      patchMatchingInput(OpInfo, Input, DAG);
     }
 
     // Compute the constraint code and ConstraintType to use.
@@ -6691,37 +6942,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               (OpInfo.Type == InlineAsm::isInput)) &&
              "Can only indirectify direct input operands!");
 
-      // Memory operands really want the address of the value.  If we don't have
-      // an indirect input, put it in the constpool if we can, otherwise spill
-      // it to a stack slot.
-      // TODO: This isn't quite right. We need to handle these according to
-      // the addressing mode that the constraint wants. Also, this may take
-      // an additional register for the computation and we don't want that
-      // either.
-
-      // If the operand is a float, integer, or vector constant, spill to a
-      // constant pool entry to get its address.
-      const Value *OpVal = OpInfo.CallOperandVal;
-      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
-        OpInfo.CallOperand = DAG.getConstantPool(
-            cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
-      } else {
-        // Otherwise, create a stack slot and emit a store to it before the
-        // asm.
-        Type *Ty = OpVal->getType();
-        auto &DL = DAG.getDataLayout();
-        uint64_t TySize = DL.getTypeAllocSize(Ty);
-        unsigned Align = DL.getPrefTypeAlignment(Ty);
-        MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-        SDValue StackSlot =
-            DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
-        Chain = DAG.getStore(
-            Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
-            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
-        OpInfo.CallOperand = StackSlot;
-      }
+      // Memory operands really want the address of the value.
+      Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
 
       // There is no longer a Value* corresponding to this operand.
       OpInfo.CallOperandVal = nullptr;
@@ -6736,7 +6958,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
   }
 
-  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // Third pass - Loop over all of the operands, assigning virtual or physregs
   // to register class operands.
   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
@@ -6761,40 +6983,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
   // bits as operand 3.
-  unsigned ExtraInfo = 0;
-  if (IA->hasSideEffects())
-    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
-  if (IA->isAlignStack())
-    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
-  if (CS.isConvergent())
-    ExtraInfo |= InlineAsm::Extra_IsConvergent;
-  // Set the asm dialect.
-  ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
-
-  // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
-  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
-    // Ideally, we would only check against memory constraints.  However, the
-    // meaning of an other constraint can be target-specific and we can't easily
-    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
-    // for other constriants as well.
-    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
-        OpInfo.ConstraintType == TargetLowering::C_Other) {
-      if (OpInfo.Type == InlineAsm::isInput)
-        ExtraInfo |= InlineAsm::Extra_MayLoad;
-      else if (OpInfo.Type == InlineAsm::isOutput)
-        ExtraInfo |= InlineAsm::Extra_MayStore;
-      else if (OpInfo.Type == InlineAsm::isClobber)
-        ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
-    }
-  }
-
   AsmNodeOperands.push_back(DAG.getTargetConstant(
-      ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+      ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -6862,24 +7052,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     case InlineAsm::isInput: {
       SDValue InOperandVal = OpInfo.CallOperand;
 
-      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+      if (OpInfo.isMatchingInputConstraint()) {
         // If this is required to match an output register we have already set,
         // just use its register.
-        unsigned OperandNo = OpInfo.getMatchedOperand();
-
-        // Scan until we find the definition we already emitted of this operand.
-        // When we find it, create a RegsForValue operand.
-        unsigned CurOp = InlineAsm::Op_FirstOperand;
-        for (; OperandNo; --OperandNo) {
-          // Advance to the next operand.
-          unsigned OpFlag =
-            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert((InlineAsm::isRegDefKind(OpFlag) ||
-                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
-                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
-          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
-        }
-
+        auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
+                                                  AsmNodeOperands);
         unsigned OpFlag =
           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
         if (InlineAsm::isRegDefKind(OpFlag) ||
@@ -6893,22 +7070,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
             return;
           }
 
-          RegsForValue MatchedRegs;
-          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
           MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
-          MatchedRegs.RegVTs.push_back(RegVT);
-          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i) {
-            if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
-              MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
-            else {
-              emitInlineAsmError(
-                  CS, "inline asm error: This value"
-                      " type register class is not natively supported!");
-              return;
-            }
+          SmallVector<unsigned, 4> Regs;
+
+          if (!createVirtualRegs(Regs,
+                                 InlineAsm::getNumOperandRegisters(OpFlag),
+                                 RegVT, DAG)) {
+            emitInlineAsmError(CS, "inline asm error: This value type register "
+                                   "class is not natively supported!");
+            return;
           }
+
+          RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
+
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
@@ -7142,19 +7316,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
   if (!Range)
     return Op;
 
-  Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
-  if (!Lo->isNullValue())
+  ConstantRange CR = getConstantRangeFromMetadata(*Range);
+  if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
+    return Op;
+
+  APInt Lo = CR.getUnsignedMin();
+  if (!Lo.isMinValue())
     return Op;
 
-  Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
-  unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+  APInt Hi = CR.getUnsignedMax();
+  unsigned Bits = Hi.getActiveBits();
 
   EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
 
   SDLoc SL = getCurSDLoc();
 
-  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
-                             Op, DAG.getValueType(SmallVT));
+  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+                             DAG.getValueType(SmallVT));
   unsigned NumVals = Op.getNode()->getNumValues();
   if (NumVals == 1)
     return ZExt;
@@ -7299,7 +7477,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   DAG.setRoot(Chain);
 
   // Inform the Frame Information that we have a stackmap in this function.
-  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+  FuncInfo.MF->getFrameInfo().setHasStackMap();
 }
 
 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
@@ -7450,7 +7628,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   DAG.DeleteNode(Call);
 
   // Inform the Frame Information that we have a patchpoint in this function.
-  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 }
 
 /// Returns an AttributeSet representing the attributes applied to the return
@@ -7498,7 +7676,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
     unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
     MachineFunction &MF = CLI.DAG.getMachineFunction();
-    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
     Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
 
     DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
@@ -7580,8 +7758,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setZExt();
       if (Args[i].isSExt)
         Flags.setSExt();
-      if (Args[i].isInReg)
+      if (Args[i].isInReg) {
+        // If we are using vectorcall calling convention, a structure that is
+        // passed InReg - is surely an HVA
+        if (CLI.CallConv == CallingConv::X86_VectorCall &&
+            isa<StructType>(FinalType)) {
+          // The first value of a structure is marked
+          if (0 == Value)
+            Flags.setHvaStart();
+          Flags.setHva();
+        }
+        // Set InReg Flag
         Flags.setInReg();
+      }
       if (Args[i].isSRet)
         Flags.setSRet();
       if (Args[i].isSwiftSelf)
@@ -7867,8 +8056,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         Flags.setZExt();
       if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
         Flags.setSExt();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
+        // If we are using vectorcall calling convention, a structure that is
+        // passed InReg - is surely an HVA
+        if (F.getCallingConv() == CallingConv::X86_VectorCall &&
+            isa<StructType>(I->getType())) {
+          // The first value of a structure is marked
+          if (0 == Value)
+            Flags.setHvaStart();
+          Flags.setHva();
+        }
+        // Set InReg Flag
         Flags.setInReg();
+      }
       if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
         Flags.setSRet();
       if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
@@ -7990,7 +8190,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
     // If this argument is unused then remember its value. It is used to generate
     // debugging information.
-    if (I->use_empty() && NumValues) {
+    bool isSwiftErrorArg =
+        TLI->supportSwiftError() &&
+        F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
+    if (I->use_empty() && NumValues && !isSwiftErrorArg) {
       SDB->setUnusedArgValue(&*I, InVals[i]);
 
       // Also remember any frame index for use in FastISel.
@@ -8004,7 +8207,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
 
-      if (!I->use_empty()) {
+      // Even an apparant 'unused' swifterror argument needs to be returned. So
+      // we do generate a copy for it that can be used on return from the
+      // function.
+      if (!I->use_empty() || isSwiftErrorArg) {
         Optional<ISD::NodeType> AssertOp;
         if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
           AssertOp = ISD::AssertSext;
@@ -8040,12 +8246,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
     }
 
-    // Update SwiftErrorMap.
-    if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() &&
-        F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) {
+    // Update the SwiftErrorVRegDefMap.
+    if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg))
-        FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg;
+        FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
+                                           FuncInfo->SwiftErrorArg, Reg);
     }
 
     // If this argument is live outside of the entry block, insert a copy from
@@ -8197,14 +8403,14 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
 }
 
 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
-                                  unsigned *TotalCases, unsigned First,
-                                  unsigned Last,
-                                  unsigned Density) {
+                                  const SmallVectorImpl<unsigned> &TotalCases,
+                                  unsigned First, unsigned Last,
+                                  unsigned Density) const {
   assert(Last >= First);
   assert(TotalCases[Last] >= TotalCases[First]);
 
-  APInt LowCase = Clusters[First].Low->getValue();
-  APInt HighCase = Clusters[Last].High->getValue();
+  const APInt &LowCase = Clusters[First].Low->getValue();
+  const APInt &HighCase = Clusters[Last].High->getValue();
   assert(LowCase.getBitWidth() == HighCase.getBitWidth());
 
   // FIXME: A range of consecutive cases has 100% density, but only requires one
@@ -8233,7 +8439,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI,
          TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
 }
 
-bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
                                          unsigned First, unsigned Last,
                                          const SwitchInst *SI,
                                          MachineBasicBlock *DefaultMBB,
@@ -8252,12 +8458,12 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
   for (unsigned I = First; I <= Last; ++I) {
     assert(Clusters[I].Kind == CC_Range);
     Prob += Clusters[I].Prob;
-    APInt Low = Clusters[I].Low->getValue();
-    APInt High = Clusters[I].High->getValue();
+    const APInt &Low = Clusters[I].Low->getValue();
+    const APInt &High = Clusters[I].High->getValue();
     NumCmps += (Low == High) ? 1 : 2;
     if (I != First) {
       // Fill the gap between this and the previous cluster.
-      APInt PreviousHigh = Clusters[I - 1].High->getValue();
+      const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
       assert(PreviousHigh.slt(Low));
       uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
       for (uint64_t J = 0; J < Gap; J++)
@@ -8325,26 +8531,37 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   if (!areJTsAllowed(TLI, SI))
     return;
 
+  const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
+
   const int64_t N = Clusters.size();
-  const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+  const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
+  const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+  const unsigned MaxJumpTableSize =
+                   OptForSize || TLI.getMaximumJumpTableSize() == 0
+                   ? UINT_MAX : TLI.getMaximumJumpTableSize();
+
+  if (N < 2 || N < MinJumpTableEntries)
+    return;
 
   // TotalCases[i]: Total nbr of cases in Clusters[0..i].
   SmallVector<unsigned, 8> TotalCases(N);
-
   for (unsigned i = 0; i < N; ++i) {
-    APInt Hi = Clusters[i].High->getValue();
-    APInt Lo = Clusters[i].Low->getValue();
+    const APInt &Hi = Clusters[i].High->getValue();
+    const APInt &Lo = Clusters[i].Low->getValue();
     TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
     if (i != 0)
       TotalCases[i] += TotalCases[i - 1];
   }
 
-  unsigned MinDensity = JumpTableDensity;
-  if (DefaultMBB->getParent()->getFunction()->optForSize())
-    MinDensity = OptsizeJumpTableDensity;
-  if (N >= MinJumpTableSize
-      && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) {
-    // Cheap case: the whole range might be suitable for jump table.
+  const unsigned MinDensity =
+    OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+
+  // Cheap case: the whole range may be suitable for jump table.
+  unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
+                            Clusters[0].Low->getValue())
+                           .getLimitedValue(UINT_MAX - 1) + 1;
+  if (JumpTableSize <= MaxJumpTableSize &&
+      isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
     CaseCluster JTCluster;
     if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
       Clusters[0] = JTCluster;
@@ -8368,14 +8585,23 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   SmallVector<unsigned, 8> MinPartitions(N);
   // LastElement[i] is the last element of the partition starting at i.
   SmallVector<unsigned, 8> LastElement(N);
-  // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
-  SmallVector<unsigned, 8> NumTables(N);
+  // PartitionsScore[i] is used to break ties when choosing between two
+  // partitionings resulting in the same number of partitions.
+  SmallVector<unsigned, 8> PartitionsScore(N);
+  // For PartitionsScore, a small number of comparisons is considered as good as
+  // a jump table and a single comparison is considered better than a jump
+  // table.
+  enum PartitionScores : unsigned {
+    NoTable = 0,
+    Table = 1,
+    FewCases = 1,
+    SingleCase = 2
+  };
 
   // Base case: There is only one way to partition Clusters[N-1].
   MinPartitions[N - 1] = 1;
   LastElement[N - 1] = N - 1;
-  assert(MinJumpTableSize > 1);
-  NumTables[N - 1] = 0;
+  PartitionsScore[N - 1] = PartitionScores::SingleCase;
 
   // Note: loop indexes are signed to avoid underflow.
   for (int64_t i = N - 2; i >= 0; i--) {
@@ -8383,23 +8609,34 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
     // Baseline: Put Clusters[i] into a partition on its own.
     MinPartitions[i] = MinPartitions[i + 1] + 1;
     LastElement[i] = i;
-    NumTables[i] = NumTables[i + 1];
+    PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
 
     // Search for a solution that results in fewer partitions.
     for (int64_t j = N - 1; j > i; j--) {
       // Try building a partition from Clusters[i..j].
-      if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) {
+      JumpTableSize = (Clusters[j].High->getValue() -
+                       Clusters[i].Low->getValue())
+                      .getLimitedValue(UINT_MAX - 1) + 1;
+      if (JumpTableSize <= MaxJumpTableSize &&
+          isDense(Clusters, TotalCases, i, j, MinDensity)) {
         unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
-        bool IsTable = j - i + 1 >= MinJumpTableSize;
-        unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
-
-        // If this j leads to fewer partitions, or same number of partitions
-        // with more lookup tables, it is a better partitioning.
+        unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+        int64_t NumEntries = j - i + 1;
+
+        if (NumEntries == 1)
+          Score += PartitionScores::SingleCase;
+        else if (NumEntries <= SmallNumberOfEntries)
+          Score += PartitionScores::FewCases;
+        else if (NumEntries >= MinJumpTableEntries)
+          Score += PartitionScores::Table;
+
+        // If this leads to fewer partitions, or to the same number of
+        // partitions with better score, it is a better partitioning.
         if (NumPartitions < MinPartitions[i] ||
-            (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+            (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
           MinPartitions[i] = NumPartitions;
           LastElement[i] = j;
-          NumTables[i] = Tables;
+          PartitionsScore[i] = Score;
         }
       }
     }
@@ -8414,7 +8651,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
     unsigned NumClusters = Last - First + 1;
 
     CaseCluster JTCluster;
-    if (NumClusters >= MinJumpTableSize &&
+    if (NumClusters >= MinJumpTableEntries &&
         buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
       Clusters[DstIndex++] = JTCluster;
     } else {
@@ -9107,7 +9344,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     WorkList.pop_back();
     unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
 
-    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+        !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
       // For optimized builds, lower large range as a balanced binary tree.
       splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
       continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index b9888ae..abde8a8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -305,12 +305,13 @@ private:
   };
 
   /// Check whether a range of clusters is dense enough for a jump table.
-  bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
-               unsigned First, unsigned Last, unsigned MinDensity);
+  bool isDense(const CaseClusterVector &Clusters,
+               const SmallVectorImpl<unsigned> &TotalCases,
+               unsigned First, unsigned Last, unsigned MinDensity) const;
 
   /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
   /// decides it's not a good idea.
-  bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+  bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
                       unsigned Last, const SwitchInst *SI,
                       MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
 
@@ -652,8 +653,6 @@ public:
     return CurInst ? CurInst->getDebugLoc() : DebugLoc();
   }
 
-  unsigned getSDNodeOrder() const { return SDNodeOrder; }
-
   void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
 
   void visit(const Instruction &I);
@@ -875,8 +874,8 @@ private:
   void visitAlloca(const AllocaInst &I);
   void visitLoad(const LoadInst &I);
   void visitStore(const StoreInst &I);
-  void visitMaskedLoad(const CallInst &I);
-  void visitMaskedStore(const CallInst &I);
+  void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+  void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
   void visitMaskedGather(const CallInst &I);
   void visitMaskedScatter(const CallInst &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
@@ -885,6 +884,7 @@ private:
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
   bool visitMemCmpCall(const CallInst &I);
+  bool visitMemPCpyCall(const CallInst &I);
   bool visitMemChrCall(const CallInst &I);
   bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
   bool visitStrCmpCall(const CallInst &I);
@@ -941,6 +941,11 @@ private:
   /// Update the DAG and DAG builder with the relevant information after
   /// a new root node has been created which could be a tail call.
   void updateDAGForMaybeTailCall(SDValue MaybeTC);
+
+  /// Return the appropriate SDDbgValue based on N.
+  SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
+                          DIExpression *Expr, int64_t Offset, DebugLoc dl,
+                          unsigned DbgSDNodeOrder);
 };
 
 /// RegsForValue - This struct represents the registers (physical or virtual)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 93ac6d6..0faaad8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::JumpTable:                  return "JumpTable";
   case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
   case ISD::RETURNADDR:                 return "RETURNADDR";
+  case ISD::ADDROFRETURNADDR:           return "ADDROFRETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
   case ISD::LOCAL_RECOVER:              return "LOCAL_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
@@ -120,7 +121,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
     unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
     if (IID < Intrinsic::num_intrinsics)
-      return Intrinsic::getName((Intrinsic::ID)IID);
+      return Intrinsic::getName((Intrinsic::ID)IID, None);
     else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
       return TII->getName(IID);
     llvm_unreachable("Invalid intrinsic ID");
@@ -261,21 +262,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FP16_TO_FP:                 return "fp16_to_fp";
   case ISD::FP_TO_FP16:                 return "fp_to_fp16";
 
-  case ISD::CONVERT_RNDSAT: {
-    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: llvm_unreachable("Unknown cvt code!");
-    case ISD::CVT_FF:                   return "cvt_ff";
-    case ISD::CVT_FS:                   return "cvt_fs";
-    case ISD::CVT_FU:                   return "cvt_fu";
-    case ISD::CVT_SF:                   return "cvt_sf";
-    case ISD::CVT_UF:                   return "cvt_uf";
-    case ISD::CVT_SS:                   return "cvt_ss";
-    case ISD::CVT_SU:                   return "cvt_su";
-    case ISD::CVT_US:                   return "cvt_us";
-    case ISD::CVT_UU:                   return "cvt_uu";
-    }
-  }
-
     // Control flow instructions
   case ISD::BR:                         return "br";
   case ISD::BRIND:                      return "brind";
@@ -321,7 +307,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
   case ISD::CTLZ:                       return "ctlz";
   case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
-    
+
   // Trampolines
   case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
   case ISD::ADJUST_TRAMPOLINE:          return "adjust_trampoline";
@@ -424,9 +410,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
     OS << '<' << CSDN->getAPIntValue() << '>';
   } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
       OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
       OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
     else {
       OS << "<APFloat(";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1d61657..64e6c22 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -426,6 +426,10 @@ static void SplitCriticalSideEffectEdges(Function &Fn) {
 }
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  // If we already selected that function, we do not need to run SDISel.
+  if (mf.getProperties().hasProperty(
+          MachineFunctionProperties::Property::Selected))
+    return false;
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
@@ -594,16 +598,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there are any calls in this machine function.
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   for (const auto &MBB : *MF) {
-    if (MFI->hasCalls() && MF->hasInlineAsm())
+    if (MFI.hasCalls() && MF->hasInlineAsm())
       break;
 
     for (const auto &MI : MBB) {
       const MCInstrDesc &MCID = TII->get(MI.getOpcode());
       if ((MCID.isCall() && !MCID.isReturn()) ||
           MI.isStackAligningInlineAsm()) {
-        MFI->setHasCalls(true);
+        MFI.setHasCalls(true);
       }
       if (MI.isInlineAsm()) {
         MF->setHasInlineAsm(true);
@@ -645,7 +649,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   if (TLI->hasCopyImplyingStackAdjustment(MF))
-    MFI->setHasCopyImplyingStackAdjustment(true);
+    MFI.setHasCopyImplyingStackAdjustment(true);
 
   // Freeze the set of reserved registers now that MachineFrameInfo has been
   // set up. All the information required by getReservedRegs() should be
@@ -721,9 +725,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 }
 
 void SelectionDAGISel::CodeGenAndEmitDAG() {
-  std::string GroupName;
-  if (TimePassesIsEnabled)
-    GroupName = "Instruction Selection and Scheduling";
+  StringRef GroupName = "sdag";
+  StringRef GroupDescription = "Instruction Selection and Scheduling";
   std::string BlockName;
   int BlockNumber = -1;
   (void)BlockNumber;
@@ -751,7 +754,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Run the DAG combiner in pre-legalize mode.
   {
-    NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
   }
 
@@ -765,7 +769,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   bool Changed;
   {
-    NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Changed = CurDAG->LegalizeTypes();
   }
 
@@ -780,8 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
     // Run the DAG combiner in post-type-legalize mode.
     {
-      NamedRegionTimer T("DAG Combining after legalize types", GroupName,
-                         TimePassesIsEnabled);
+      NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
+                         GroupName, GroupDescription, TimePassesIsEnabled);
       CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
     }
 
@@ -791,13 +796,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   }
 
   {
-    NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Changed = CurDAG->LegalizeVectors();
   }
 
   if (Changed) {
     {
-      NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+      NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
+                         GroupDescription, TimePassesIsEnabled);
       CurDAG->LegalizeTypes();
     }
 
@@ -806,8 +813,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
     // Run the DAG combiner in post-type-legalize mode.
     {
-      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
-                         TimePassesIsEnabled);
+      NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
+                         GroupName, GroupDescription, TimePassesIsEnabled);
       CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
     }
 
@@ -819,7 +826,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->viewGraph("legalize input for " + BlockName);
 
   {
-    NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Legalize();
   }
 
@@ -831,7 +839,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Run the DAG combiner in post-legalize mode.
   {
-    NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
   }
 
@@ -847,7 +856,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
   {
-    NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("isel", "Instruction Selection", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     DoInstructionSelection();
   }
 
@@ -860,8 +870,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Schedule machine code.
   ScheduleDAGSDNodes *Scheduler = CreateScheduler();
   {
-    NamedRegionTimer T("Instruction Scheduling", GroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Scheduler->Run(CurDAG, FuncInfo->MBB);
   }
 
@@ -872,7 +882,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
   {
-    NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("emit", "Instruction Creation", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
 
     // FuncInfo->InsertPt is passed by reference and set to the end of the
     // scheduled instructions.
@@ -886,8 +897,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Free the scheduler state.
   {
-    NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     delete Scheduler;
   }
 
@@ -1003,10 +1014,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
 
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
-  MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+  MCSymbol *Label = MF->addLandingPad(MBB);
 
   // Assign the call site to the landing pad's begin label.
-  MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+  MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
 
   const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
@@ -1153,14 +1164,22 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
     return;
 
   FuncInfo->SwiftErrorVals.clear();
-  FuncInfo->SwiftErrorMap.clear();
-  FuncInfo->SwiftErrorWorklist.clear();
+  FuncInfo->SwiftErrorVRegDefMap.clear();
+  FuncInfo->SwiftErrorVRegUpwardsUse.clear();
+  FuncInfo->SwiftErrorArg = nullptr;
 
   // Check if function has a swifterror argument.
+  bool HaveSeenSwiftErrorArg = false;
   for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
        AI != AE; ++AI)
-    if (AI->hasSwiftErrorAttr())
+    if (AI->hasSwiftErrorAttr()) {
+      assert(!HaveSeenSwiftErrorArg &&
+             "Must have only one swifterror parameter");
+      (void)HaveSeenSwiftErrorArg; // silence warning.
+      HaveSeenSwiftErrorArg = true;
+      FuncInfo->SwiftErrorArg = &*AI;
       FuncInfo->SwiftErrorVals.push_back(&*AI);
+    }
 
   for (const auto &LLVMBB : Fn)
     for (const auto &Inst : LLVMBB) {
@@ -1170,95 +1189,152 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
     }
 }
 
-/// For each basic block, merge incoming swifterror values or simply propagate
-/// them. The merged results will be saved in SwiftErrorMap. For predecessors
-/// that are not yet visited, we create virtual registers to hold the swifterror
-/// values and save them in SwiftErrorWorklist.
-static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo,
-                            const TargetLowering *TLI,
-                            const TargetInstrInfo *TII,
-                            const BasicBlock *LLVMBB,
-                            SelectionDAGBuilder *SDB) {
+static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+                                                const TargetLowering *TLI,
+                                                const TargetInstrInfo *TII,
+                                                const BasicBlock *LLVMBB,
+                                                SelectionDAGBuilder *SDB) {
   if (!TLI->supportSwiftError())
     return;
 
-  // We should only do this when we have swifterror parameter or swifterror
+  // We only need to do this when we have swifterror parameter or swifterror
   // alloc.
   if (FuncInfo->SwiftErrorVals.empty())
     return;
 
-  // At beginning of a basic block, insert PHI nodes or get the virtual
-  // register from the only predecessor, and update SwiftErrorMap; if one
-  // of the predecessors is not visited, update SwiftErrorWorklist.
-  // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy
-  // to sync up the virtual register assignment.
-
-  // Always create a virtual register for each swifterror value in entry block.
-  auto &DL = SDB->DAG.getDataLayout();
-  const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
   if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
-    for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+    auto &DL = FuncInfo->MF->getDataLayout();
+    auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+      // We will always generate a copy from the argument. It is always used at
+      // least by the 'return' of the swifterror.
+      if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+        continue;
       unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
       // Assign Undef to Vreg. We construct MI directly to make sure it works
       // with FastISel.
-      BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
-          TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
-      FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+      BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+              SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+              VReg);
+      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
     }
-    return;
   }
+}
 
-  if (auto *UniquePred = LLVMBB->getUniquePredecessor()) {
-    auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred];
-    if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) {
-      // Update SwiftErrorWorklist with a new virtual register.
-      for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-        unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-        FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg);
-        // Propagate the information from the single predecessor.
-        FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
-      }
-      return;
-    }
-    // Propagate the information from the single predecessor.
-    FuncInfo->SwiftErrorMap[FuncInfo->MBB] =
-      FuncInfo->SwiftErrorMap[UniquePredMBB];
+/// Propagate swifterror values through the machine function CFG.
+static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
+  auto *TLI = FuncInfo->TLI;
+  if (!TLI->supportSwiftError())
     return;
-  }
 
-  // For the case of multiple predecessors, update SwiftErrorWorklist.
-  // Handle the case where we have two or more predecessors being the same.
-  for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
-       PI != PE; ++PI) {
-    auto *PredMBB = FuncInfo->MBBMap[*PI];
-    if (!FuncInfo->SwiftErrorMap.count(PredMBB) &&
-        !FuncInfo->SwiftErrorWorklist.count(PredMBB)) {
-      for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-        unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-        // When we actually visit the basic block PredMBB, we will materialize
-        // the virtual register assignment in copySwiftErrorsToFinalVRegs.
-        FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg);
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (FuncInfo->SwiftErrorVals.empty())
+    return;
+
+  // For each machine basic block in reverse post order.
+  ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
+  for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+           It = RPOT.begin(),
+           E = RPOT.end();
+       It != E; ++It) {
+    MachineBasicBlock *MBB = *It;
+
+    // For each swifterror value in the function.
+    for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+      auto Key = std::make_pair(MBB, SwiftErrorVal);
+      auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+      auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
+      bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
+      unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
+      bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
+      assert(!(UpwardsUse && !DownwardDef) &&
+             "We can't have an upwards use but no downwards def");
+
+      // If there is no upwards exposed use and an entry for the swifterror in
+      // the def map for this value we don't need to do anything: We already
+      // have a downward def for this basic block.
+      if (!UpwardsUse && DownwardDef)
+        continue;
+
+      // Otherwise we either have an upwards exposed use vreg that we need to
+      // materialize or need to forward the downward def from predecessors.
+
+      // Check whether we have a single vreg def from all predecessors.
+      // Otherwise we need a phi.
+      SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
+      SmallSet<const MachineBasicBlock*, 8> Visited;
+      for (auto *Pred : MBB->predecessors()) {
+        if (!Visited.insert(Pred).second)
+          continue;
+        VRegs.push_back(std::make_pair(
+            Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
+        if (Pred != MBB)
+          continue;
+        // We have a self-edge.
+        // If there was no upwards use in this basic block there is now one: the
+        // phi needs to use it self.
+        if (!UpwardsUse) {
+          UpwardsUse = true;
+          UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+          assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
+          UUseVReg = UUseIt->second;
+        }
+      }
+
+      // We need a phi node if we have more than one predecessor with different
+      // downward defs.
+      bool needPHI =
+          VRegs.size() >= 1 &&
+          std::find_if(
+              VRegs.begin(), VRegs.end(),
+              [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
+                  -> bool { return V.second != VRegs[0].second; }) !=
+              VRegs.end();
+
+      // If there is no upwards exposed used and we don't need a phi just
+      // forward the swifterror vreg from the predecessor(s).
+      if (!UpwardsUse && !needPHI) {
+        assert(!VRegs.empty() &&
+               "No predecessors? The entry block should bail out earlier");
+        // Just forward the swifterror vreg from the predecessor(s).
+        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
+        continue;
       }
-    }
-  }
 
-  // For the case of multiple predecessors, create a virtual register for
-  // each swifterror value and generate Phi node.
-  for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-    unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-    FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
-
-    MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB,
-        FuncInfo->MBB->begin(), SDB->getCurDebugLoc(),
-        TII->get(TargetOpcode::PHI), VReg);
-    for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
-         PI != PE; ++PI) {
-      auto *PredMBB = FuncInfo->MBBMap[*PI];
-      unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ?
-        FuncInfo->SwiftErrorMap[PredMBB][I] :
-        FuncInfo->SwiftErrorWorklist[PredMBB][I];
-      SwiftErrorPHI.addReg(SwiftErrorReg)
-                   .addMBB(PredMBB);
+      auto DLoc = isa<Instruction>(SwiftErrorVal)
+                      ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+                      : DebugLoc();
+      const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
+
+      // If we don't need a phi create a copy to the upward exposed vreg.
+      if (!needPHI) {
+        assert(UpwardsUse);
+        unsigned DestReg = UUseVReg;
+        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+                DestReg)
+            .addReg(VRegs[0].second);
+        continue;
+      }
+
+      // We need a phi: if there is an upwards exposed use we already have a
+      // destination virtual register number otherwise we generate a new one.
+      auto &DL = FuncInfo->MF->getDataLayout();
+      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+      unsigned PHIVReg =
+          UpwardsUse ? UUseVReg
+                     : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+      MachineInstrBuilder SwiftErrorPHI =
+          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+                  TII->get(TargetOpcode::PHI), PHIVReg);
+      for (auto BBRegPair : VRegs) {
+        SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+      }
+
+      // We did not have a definition in this block before: store the phi's vreg
+      // as this block downward exposed def.
+      if (!UpwardsUse)
+        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
     }
   }
 }
@@ -1309,7 +1385,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     if (!FuncInfo->MBB)
       continue; // Some blocks like catchpads have no code or MBB.
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-    mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB);
+    createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
 
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1486,6 +1562,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FuncInfo->PHINodesToUpdate.clear();
   }
 
+  propagateSwiftErrorVRegs(FuncInfo);
+
   delete FastIS;
   SDB->clearDanglingDebugInfo();
   SDB->SPDescriptor.resetPerFunctionState();
@@ -2170,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
 /// to use the new results.
 void SelectionDAGISel::UpdateChains(
     SDNode *NodeToMatch, SDValue InputChain,
-    const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+    SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
   SmallVector<SDNode*, 4> NowDeadNodes;
 
   // Now that all the normal results are replaced, we replace the chain and
@@ -2182,6 +2260,11 @@ void SelectionDAGISel::UpdateChains(
     // Replace all the chain results with the final chain we ended up with.
     for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
       SDNode *ChainNode = ChainNodesMatched[i];
+      // If ChainNode is null, it's because we replaced it on a previous
+      // iteration and we cleared it out of the map. Just skip it.
+      if (!ChainNode)
+        continue;
+
       assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
              "Deleted node left in chain");
 
@@ -2194,6 +2277,11 @@ void SelectionDAGISel::UpdateChains(
       if (ChainVal.getValueType() == MVT::Glue)
         ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
       assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      SelectionDAG::DAGNodeDeletedListener NDL(
+          *CurDAG, [&](SDNode *N, SDNode *E) {
+            std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+                         static_cast<SDNode *>(nullptr));
+          });
       CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
 
       // If the node became dead and we haven't already seen it, delete it.
@@ -2694,14 +2782,15 @@ struct MatchScope {
 /// for this.
 class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
 {
-      SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
-      SmallVectorImpl<MatchScope> &MatchScopes;
+  SDNode **NodeToMatch;
+  SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+  SmallVectorImpl<MatchScope> &MatchScopes;
 public:
-  MatchStateUpdater(SelectionDAG &DAG,
-                    SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
-                    SmallVectorImpl<MatchScope> &MS) :
-    SelectionDAG::DAGUpdateListener(DAG),
-    RecordedNodes(RN), MatchScopes(MS) { }
+  MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+                    SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+                    SmallVectorImpl<MatchScope> &MS)
+      : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+        RecordedNodes(RN), MatchScopes(MS) {}
 
   void NodeDeleted(SDNode *N, SDNode *E) override {
     // Some early-returns here to avoid the search if we deleted the node or
@@ -2711,6 +2800,9 @@ public:
     // update listener during matching a complex patterns.
     if (!E || E->isMachineOpcode())
       return;
+    // Check if NodeToMatch was updated.
+    if (N == *NodeToMatch)
+      *NodeToMatch = E;
     // Performing linear search here does not matter because we almost never
     // run this code.  You'd have to have a CSE during complex pattern
     // matching.
@@ -3003,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
       // consistent.
       std::unique_ptr<MatchStateUpdater> MSU;
       if (ComplexPatternFuncMutatesDAG())
-        MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+        MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
                                         MatchScopes));
 
       if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
@@ -3388,7 +3480,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
         SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
                                                               SDNode *E) {
           auto &Chain = ChainNodesMatched;
-          assert((!E || llvm::find(Chain, N) == Chain.end()) &&
+          assert((!E || !is_contained(Chain, N)) &&
                  "Chain node replaced during MorphNode");
           Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
         });
@@ -3487,7 +3579,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                 NodeToMatch->getValueType(i) == MVT::iPTR ||
                 Res.getValueType() == MVT::iPTR ||
                 NodeToMatch->getValueType(i).getSizeInBits() ==
-                    Res.getValueType().getSizeInBits()) &&
+                    Res.getValueSizeInBits()) &&
                "invalid replacement");
         CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
       }
@@ -3579,7 +3671,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
     unsigned iid =
       cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
     if (iid < Intrinsic::num_intrinsics)
-      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None);
     else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
       Msg << "target intrinsic %" << TII->getName(iid);
     else
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90aaba2..d27e245 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -55,7 +55,8 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
   NextSlotToAllocate = 0;
   // Need to resize this on each safepoint - we need the two to stay in sync and
   // the clear patterns of a SelectionDAGBuilder have no relation to
-  // FunctionLoweringInfo.  SmallBitVector::reset initializes all bits to false.
+  // FunctionLoweringInfo.  Also need to ensure used bits get cleared.
+  AllocatedStackSlots.clear();
   AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
 }
 
@@ -70,7 +71,7 @@ SDValue
 StatepointLoweringState::allocateStackSlot(EVT ValueType,
                                            SelectionDAGBuilder &Builder) {
   NumSlotsAllocatedForStatepoints++;
-  auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
 
   unsigned SpillSize = ValueType.getSizeInBits() / 8;
   assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
@@ -82,16 +83,16 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
   const size_t NumSlots = AllocatedStackSlots.size();
   assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
 
-  // The stack slots in StatepointStackSlots beyond the first NumSlots were
-  // added in this instance of StatepointLoweringState, and cannot be re-used.
-  assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() &&
+  assert(AllocatedStackSlots.size() ==
+         Builder.FuncInfo.StatepointStackSlots.size() &&
          "Broken invariant");
 
   for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
     if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
       const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
-      if (MFI->getObjectSize(FI) == SpillSize) {
+      if (MFI.getObjectSize(FI) == SpillSize) {
         AllocatedStackSlots.set(NextSlotToAllocate);
+        // TODO: Is ValueType the right thing to use here?
         return Builder.DAG.getFrameIndex(FI, ValueType);
       }
     }
@@ -101,9 +102,13 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
 
   SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
   const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-  MFI->markAsStatepointSpillSlotObjectIndex(FI);
+  MFI.markAsStatepointSpillSlotObjectIndex(FI);
 
   Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+  AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true);
+  assert(AllocatedStackSlots.size() ==
+         Builder.FuncInfo.StatepointStackSlots.size() &&
+         "Broken invariant");
 
   StatepointMaxSlotsRequired = std::max<unsigned long>(
       StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
@@ -350,9 +355,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
     // vary since we spill vectors of pointers too).  At some point we
     // can consider allowing spills of smaller values to larger slots
     // (i.e. change the '==' in the assert below to a '>=').
-    auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
-    assert((MFI->getObjectSize(Index) * 8) ==
-               Incoming.getValueType().getSizeInBits() &&
+    MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+    assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
            "Bad spill:  stack slot does not match!");
 #endif
 
@@ -370,7 +374,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
 /// Lower a single value incoming to a statepoint node.  This value can be
 /// either a deopt value or a gc value, the handling is the same.  We special
 /// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming,
+static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
                                          SmallVectorImpl<SDValue> &Ops,
                                          SelectionDAGBuilder &Builder) {
   SDValue Chain = Builder.getRoot();
@@ -389,6 +393,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
     // relocate the address of the alloca itself?)
     Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
                                                   Incoming.getValueType()));
+  } else if (LiveInOnly) {
+    // If this value is live in (not live-on-return, or live-through), we can
+    // treat it the same way patchpoint treats it's "live in" values.  We'll 
+    // end up folding some of these into stack references, but they'll be 
+    // handled by the register allocator.  Note that we do not have the notion
+    // of a late use so these values might be placed in registers which are 
+    // clobbered by the call.  This is fine for live-in.
+    Ops.push_back(Incoming);
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -439,19 +451,38 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
                "non gc managed derived pointer found in statepoint");
       }
     }
+    assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
   } else {
     assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
     assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
   }
 #endif
 
+  // Figure out what lowering strategy we're going to use for each part
+  // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+  // as "live-through". A "live-through" variable is one which is "live-in",
+  // "live-out", and live throughout the lifetime of the call (i.e. we can find
+  // it from any PC within the transitive callee of the statepoint).  In
+  // particular, if the callee spills callee preserved registers we may not
+  // be able to find a value placed in that register during the call.  This is
+  // fine for live-out, but not for live-through.  If we were willing to make
+  // assumptions about the code generator producing the callee, we could
+  // potentially allow live-through values in callee saved registers.
+  const bool LiveInDeopt =
+    SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+  auto isGCValue =[&](const Value *V) {
+    return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+  };
+  
   // Before we actually start lowering (and allocating spill slots for values),
   // reserve any stack slots which we judge to be profitable to reuse for a
   // particular value.  This is purely an optimization over the code below and
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
   for (const Value *V : SI.DeoptState) {
-    reservePreviousStackSlotForValue(V, Builder);
+    if (!LiveInDeopt || isGCValue(V))
+      reservePreviousStackSlotForValue(V, Builder);
   }
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
     reservePreviousStackSlotForValue(SI.Bases[i], Builder);
@@ -468,7 +499,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // what type of values are contained within.
   for (const Value *V : SI.DeoptState) {
     SDValue Incoming = Builder.getValue(V);
-    lowerIncomingStatepointValue(Incoming, Ops, Builder);
+    const bool LiveInValue = LiveInDeopt && !isGCValue(V);
+    lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
   }
 
   // Finally, go ahead and lower all the gc arguments.  There's no prefixed
@@ -478,10 +510,12 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // (base[0], ptr[0], base[1], ptr[1], ...)
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
     const Value *Base = SI.Bases[i];
-    lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+    lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
+                                 Ops, Builder);
 
     const Value *Ptr = SI.Ptrs[i];
-    lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+    lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
+                                 Ops, Builder);
   }
 
   // If there are any explicit spill slots passed to the statepoint, record
@@ -889,7 +923,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
 void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
 #ifndef NDEBUG
   // Consistency check
-  // We skip this check for relocates not in the same basic block as thier
+  // We skip this check for relocates not in the same basic block as their
   // statepoint. It would be too expensive to preserve validation info through
   // different basic blocks.
   if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 806646f..690f0d2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -216,7 +216,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   case ISD::SETUEQ:
     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
           (VT == MVT::f64) ? RTLIB::UO_F64 :
-          (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128;
+          (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
@@ -418,6 +418,58 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   return false;
 }
 
+bool
+TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
+                                                        unsigned OpIdx,
+                                                        const APInt &Demanded,
+                                                        DAGCombinerInfo &DCI) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Op = User->getOperand(OpIdx);
+  APInt KnownZero, KnownOne;
+
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
+                                *this, 0, true))
+    return false;
+
+
+  // Old will not always be the same as Op.  For example:
+  //
+  // Demanded = 0xffffff
+  // Op = i64 truncate (i32 and x, 0xffffff)
+  // In this case simplify demand bits will want to replace the 'and' node
+  // with the value 'x', which will give us:
+  // Old = i32 and x, 0xffffff
+  // New = x
+  if (Old.hasOneUse()) {
+    // For the one use case, we just commit the change.
+    DCI.CommitTargetLoweringOpt(*this);
+    return true;
+  }
+
+  // If Old has more than one use then it must be Op, because the
+  // AssumeSingleUse flag is not propogated to recursive calls of
+  // SimplifyDemanded bits, so the only node with multiple use that
+  // it will attempt to combine will be opt.
+  assert(Old == Op);
+
+  SmallVector <SDValue, 4> NewOps;
+  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+    if (i == OpIdx) {
+      NewOps.push_back(New);
+      continue;
+    }
+    NewOps.push_back(User->getOperand(i));
+  }
+  DAG.UpdateNodeOperands(User, NewOps);
+  // Op has less users now, so we may be able to perform additional combines
+  // with it.
+  DCI.AddToWorklist(Op.getNode());
+  // User's operands have been updated, so we may be able to do new combines
+  // with it.
+  DCI.AddToWorklist(User);
+  return true;
+}
+
 /// Look at Op. At this point, we know that only the DemandedMask bits of the
 /// result of Op are ever used downstream. If we can use this information to
 /// simplify Op, create a new simplified DAG node and return true, returning the
@@ -430,9 +482,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                           APInt &KnownZero,
                                           APInt &KnownOne,
                                           TargetLoweringOpt &TLO,
-                                          unsigned Depth) const {
+                                          unsigned Depth,
+                                          bool AssumeSingleUse) const {
   unsigned BitWidth = DemandedMask.getBitWidth();
-  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+  assert(Op.getScalarValueSizeInBits() == BitWidth &&
          "Mask size mismatches value type size!");
   APInt NewMask = DemandedMask;
   SDLoc dl(Op);
@@ -442,7 +495,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   KnownZero = KnownOne = APInt(BitWidth, 0);
 
   // Other users may use these bits.
-  if (!Op.getNode()->hasOneUse()) {
+  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
     if (Depth != 0) {
       // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
@@ -468,22 +521,63 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
     KnownZero = ~KnownOne;
     return false;   // Don't fall through, will infinitely loop.
+  case ISD::BUILD_VECTOR:
+    // Collect the known bits that are shared by every constant vector element.
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    for (SDValue SrcOp : Op->ops()) {
+      if (!isa<ConstantSDNode>(SrcOp)) {
+        // We can only handle all constant values - bail out with no known bits.
+        KnownZero = KnownOne = APInt(BitWidth, 0);
+        return false;
+      }
+      KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
+      KnownZero2 = ~KnownOne2;
+
+      // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+      if (KnownOne2.getBitWidth() != BitWidth) {
+        assert(KnownOne2.getBitWidth() > BitWidth &&
+               KnownZero2.getBitWidth() > BitWidth &&
+               "Expected BUILD_VECTOR implicit truncation");
+        KnownOne2 = KnownOne2.trunc(BitWidth);
+        KnownZero2 = KnownZero2.trunc(BitWidth);
+      }
+
+      // Known bits are the values that are shared by every element.
+      // TODO: support per-element known bits.
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
     // If the RHS is a constant, check to see if the LHS would be zero without
     // using the bits from the RHS.  Below, we use knowledge about the RHS to
     // simplify the LHS, here we're using information from the LHS to simplify
     // the RHS.
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      SDValue Op0 = Op.getOperand(0);
       APInt LHSZero, LHSOne;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+      TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
-        return TLO.CombineTo(Op, Op.getOperand(0));
+        return TLO.CombineTo(Op, Op0);
+
       // If any of the set bits in the RHS are known zero on the LHS, shrink
       // the constant.
       if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
         return true;
+
+      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+      // constant, but if this 'and' is only clearing bits that were just set by
+      // the xor, then this 'and' can be eliminated by shrinking the mask of
+      // the xor. For example, for a 32-bit X:
+      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+          LHSOne == ~RHSC->getAPIntValue()) {
+        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
+                                      Op0.getOperand(0), Op.getOperand(1));
+        return TLO.CombineTo(Op, Xor);
+      }
     }
 
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
@@ -599,10 +693,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the RHS is a constant, see if we can simplify it.
     // for XOR, we prefer to force bits to 1 if they will make a -1.
-    // if we can't force bits, try to shrink constant
+    // If we can't force bits, try to shrink the constant.
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt Expanded = C->getAPIntValue() | (~NewMask);
-      // if we can expand it to have all bits set, do it
+      // If we can expand it to have all bits set, do it.
       if (Expanded.isAllOnesValue()) {
         if (Expanded != C->getAPIntValue()) {
           EVT VT = Op.getValueType();
@@ -610,7 +704,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                         TLO.DAG.getConstant(Expanded, dl, VT));
           return TLO.CombineTo(Op, New);
         }
-        // if it already has all the bits set, nothing to change
+        // If it already has all the bits set, nothing to change
         // but don't shrink either!
       } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
         return true;
@@ -823,7 +917,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // demand the input sign bit.
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
-        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+        InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
 
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
@@ -866,9 +960,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
     if (MsbMask == NewMask) {
-      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+      unsigned ShAmt = ExVT.getScalarSizeInBits();
       SDValue InOp = Op.getOperand(0);
-      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+      unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
       bool AlreadySignExtended =
         TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
       // However if the input is already sign extended we expect the sign
@@ -892,17 +986,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
-                            BitWidth - ExVT.getScalarType().getSizeInBits());
+                            BitWidth - ExVT.getScalarSizeInBits());
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+      APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
-                           ExVT.getScalarType().getSizeInBits()) &
+                           ExVT.getScalarSizeInBits()) &
       NewMask;
 
     // Since the sign extended bits are demanded, we know that the sign
@@ -919,8 +1013,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
-      return TLO.CombineTo(Op,
-                          TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+      return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
+                                   Op.getOperand(0), dl, ExVT.getScalarType()));
 
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
@@ -957,8 +1051,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt InMask = NewMask.trunc(OperandBitWidth);
 
     // If none of the top bits are demanded, convert this into an any_extend.
@@ -980,7 +1073,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
@@ -1020,8 +1113,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ANY_EXTEND: {
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt InMask = NewMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
@@ -1034,8 +1126,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::TRUNCATE: {
     // Simplify the input, using demanded bit information, and compute the known
     // zero/one bits live out.
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt TruncMask = NewMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
@@ -1109,7 +1200,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (!TLO.LegalOperations() &&
         !Op.getValueType().isVector() &&
         !Op.getOperand(0).getValueType().isVector() &&
-        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -1120,10 +1211,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
-        unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+        unsigned OpVTSizeInBits = Op.getValueSizeInBits();
         if (!OpVTLegal && OpVTSizeInBits > 32)
           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
-        unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+        unsigned ShVal = Op.getValueSizeInBits() - 1;
         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                                  Op.getValueType(),
@@ -1139,16 +1230,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt LoMask = APInt::getLowBitsSet(BitWidth,
                                         BitWidth - NewMask.countLeadingZeros());
     if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
-      return true;
-    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
-      return true;
-    // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+                             KnownOne2, TLO, Depth+1) ||
+        SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1) ||
+        // See if the operation should be performed at a smaller bit width.
+        TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
+      const SDNodeFlags *Flags = Op.getNode()->getFlags();
+      if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
+        // Disable the nsw and nuw flags. We can no longer guarantee that we
+        // won't wrap after simplification.
+        SDNodeFlags NewFlags = *Flags;
+        NewFlags.setNoSignedWrap(false);
+        NewFlags.setNoUnsignedWrap(false);
+        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+                                        Op.getOperand(0), Op.getOperand(1),
+                                        &NewFlags);
+        return TLO.CombineTo(Op, NewOp);
+      }
       return true;
+    }
+    LLVM_FALLTHROUGH;
   }
-  // FALL THROUGH
   default:
     // Just use computeKnownBits to compute output bits.
     TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
@@ -1214,11 +1316,11 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
     if (!BV)
       return false;
 
-    BitVector UndefElements;
-    CN = BV->getConstantSplatNode(&UndefElements);
-    // Only interested in constant splats, and we don't try to handle undef
-    // elements in identifying boolean constants.
-    if (!CN || UndefElements.none())
+    // Only interested in constant splats, we don't care about undef
+    // elements in identifying boolean constants and getConstantSplatNode
+    // returns NULL if all ops are undef;
+    CN = BV->getConstantSplatNode();
+    if (!CN)
       return false;
   }
 
@@ -1254,11 +1356,11 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
     if (!BV)
       return false;
 
-    BitVector UndefElements;
-    CN = BV->getConstantSplatNode(&UndefElements);
-    // Only interested in constant splats, and we don't try to handle undef
-    // elements in identifying boolean constants.
-    if (!CN || UndefElements.none())
+    // Only interested in constant splats, we don't care about undef
+    // elements in identifying boolean constants and getConstantSplatNode
+    // returns NULL if all ops are undef;
+    CN = BV->getConstantSplatNode();
+    if (!CN)
       return false;
   }
 
@@ -1390,7 +1492,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       const APInt &ShAmt
         = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+          ShAmt == Log2_32(N0.getValueSizeInBits())) {
         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
           // (srl (ctlz x), 5) == 0  -> X != 0
           // (srl (ctlz x), 5) != 1  -> X != 0
@@ -1412,8 +1514,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       CTPOP = N0.getOperand(0);
 
     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
-        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
-                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+        (N0 == CTPOP ||
+         N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
       EVT CTVT = CTPOP.getValueType();
       SDValue CTOp = CTPOP.getOperand(0);
 
@@ -1478,6 +1580,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
           // Will get folded away.
           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+          if (MinBits == 1 && C1 == 1)
+            // Invert the condition.
+            return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
         }
@@ -1530,7 +1636,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       APInt bestMask;
       unsigned bestWidth = 0, bestOffset = 0;
       if (!Lod->isVolatile() && Lod->isUnindexed()) {
-        unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned origWidth = N0.getValueSizeInBits();
         unsigned maskWidth = origWidth;
         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
         // 8 bits, but have to be careful...
@@ -1577,7 +1683,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
-      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+      unsigned InSize = N0.getOperand(0).getValueSizeInBits();
 
       // If the comparison constant has bits in the upper part, the
       // zero-extended value could never match.
@@ -2297,7 +2403,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       Ops.push_back(Op);
       return;
     }
-    // fall through
+    LLVM_FALLTHROUGH;
   case 'i':    // Simple Integer or Relocatable Constant
   case 'n':    // Simple Integer
   case 's': {  // Relocatable Constant
@@ -2946,7 +3052,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
                             DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
-    return SDValue();       // No mulhu or equvialent
+    return SDValue();       // No mulhu or equivalent
 
   Created->push_back(Q.getNode());
 
@@ -2987,108 +3093,190 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
 // Legalization Utilities
 //===----------------------------------------------------------------------===//
 
-bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
-                               SelectionDAG &DAG, SDValue LL, SDValue LH,
-                               SDValue RL, SDValue RH) const {
-  EVT VT = N->getValueType(0);
-  SDLoc dl(N);
-
-  bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
-  bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
-  bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
-  bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
-  if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
-    unsigned OuterBitSize = VT.getSizeInBits();
-    unsigned InnerBitSize = HiLoVT.getSizeInBits();
-    unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
-    unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
-
-    // LL, LH, RL, and RH must be either all NULL or all set to a value.
-    assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
-           (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
-
-    if (!LL.getNode() && !RL.getNode() &&
-        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
-      LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
-      RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
-    }
-
-    if (!LL.getNode())
-      return false;
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+                                    SDValue LHS, SDValue RHS,
+                                    SmallVectorImpl<SDValue> &Result,
+                                    EVT HiLoVT, SelectionDAG &DAG,
+                                    MulExpansionKind Kind, SDValue LL,
+                                    SDValue LH, SDValue RL, SDValue RH) const {
+  assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
+         Opcode == ISD::SMUL_LOHI);
+
+  bool HasMULHS = (Kind == MulExpansionKind::Always) ||
+                  isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+  bool HasMULHU = (Kind == MulExpansionKind::Always) ||
+                  isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+  bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+                      isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+  bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+                      isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+
+  if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
+    return false;
 
-    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
-    if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
-        DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
-      // The inputs are both zero-extended.
-      if (HasUMUL_LOHI) {
-        // We can emit a umul_lohi.
-        Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
-                         RL);
-        Hi = SDValue(Lo.getNode(), 1);
-        return true;
-      }
-      if (HasMULHU) {
-        // We can emit a mulhu+mul.
-        Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-        Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
-        return true;
-      }
+  unsigned OuterBitSize = VT.getScalarSizeInBits();
+  unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
+  unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
+  unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
+
+  // LL, LH, RL, and RH must be either all NULL or all set to a value.
+  assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+         (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+  SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
+  auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
+                          bool Signed) -> bool {
+    if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
+      Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
+      Hi = SDValue(Lo.getNode(), 1);
+      return true;
     }
-    if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
-      // The input values are both sign-extended.
-      if (HasSMUL_LOHI) {
-        // We can emit a smul_lohi.
-        Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
-                         RL);
-        Hi = SDValue(Lo.getNode(), 1);
-        return true;
-      }
-      if (HasMULHS) {
-        // We can emit a mulhs+mul.
-        Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-        Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
-        return true;
-      }
+    if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
+      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
+      Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
+      return true;
     }
+    return false;
+  };
 
-    if (!LH.getNode() && !RH.getNode() &&
-        isOperationLegalOrCustom(ISD::SRL, VT) &&
-        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
-      auto &DL = DAG.getDataLayout();
-      unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
-      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
-      LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
-      LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
-      RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
-      RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
-    }
+  SDValue Lo, Hi;
 
-    if (!LH.getNode())
-      return false;
+  if (!LL.getNode() && !RL.getNode() &&
+      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+    LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
+    RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
+  }
+
+  if (!LL.getNode())
+    return false;
 
-    if (HasUMUL_LOHI) {
-      // Lo,Hi = umul LHS, RHS.
-      SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
-                                     DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
-      Lo = UMulLOHI;
-      Hi = UMulLOHI.getValue(1);
-      RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
-      LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+  APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+  if (DAG.MaskedValueIsZero(LHS, HighMask) &&
+      DAG.MaskedValueIsZero(RHS, HighMask)) {
+    // The inputs are both zero-extended.
+    if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
+      Result.push_back(Lo);
+      Result.push_back(Hi);
+      if (Opcode != ISD::MUL) {
+        SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+        Result.push_back(Zero);
+        Result.push_back(Zero);
+      }
       return true;
     }
-    if (HasMULHU) {
-      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-      Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
-      RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
-      LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+  }
+
+  if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
+      RHSSB > InnerBitSize) {
+    // The input values are both sign-extended.
+    // TODO non-MUL case?
+    if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
+      Result.push_back(Lo);
+      Result.push_back(Hi);
       return true;
     }
   }
-  return false;
+
+  unsigned ShiftAmount = OuterBitSize - InnerBitSize;
+  EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
+  if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
+    // FIXME getShiftAmountTy does not always return a sensible result when VT
+    // is an illegal type, and so the type may be too small to fit the shift
+    // amount. Override it with i32. The shift will have to be legalized.
+    ShiftAmountTy = MVT::i32;
+  }
+  SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+
+  if (!LH.getNode() && !RH.getNode() &&
+      isOperationLegalOrCustom(ISD::SRL, VT) &&
+      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+    LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
+    LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+    RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
+    RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+  }
+
+  if (!LH.getNode())
+    return false;
+
+  if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
+    return false;
+
+  Result.push_back(Lo);
+
+  if (Opcode == ISD::MUL) {
+    RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+    LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+    Result.push_back(Hi);
+    return true;
+  }
+
+  // Compute the full width result.
+  auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+    Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+    Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+    return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+  };
+
+  SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+  if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
+    return false;
+
+  // This is effectively the add part of a multiply-add of half-sized operands,
+  // so it cannot overflow.
+  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+  if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
+    return false;
+
+  Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+                     Merge(Lo, Hi));
+
+  SDValue Carry = Next.getValue(1);
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+
+  if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
+    return false;
+
+  SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+  Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+                   Carry);
+  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+  if (Opcode == ISD::SMUL_LOHI) {
+    SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+                                  DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
+    Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
+
+    NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+                          DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
+    Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
+  }
+
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  return true;
+}
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+                               SelectionDAG &DAG, MulExpansionKind Kind,
+                               SDValue LL, SDValue LH, SDValue RL,
+                               SDValue RH) const {
+  SmallVector<SDValue, 2> Result;
+  bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+                           N->getOperand(0), N->getOperand(1), Result, HiLoVT,
+                           DAG, Kind, LL, LH, RL, RH);
+  if (Ok) {
+    assert(Result.size() == 2);
+    Lo = Result[0];
+    Hi = Result[1];
+  }
+  return Ok;
 }
 
 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
@@ -3190,7 +3378,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   }
 
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
-  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
+  SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
 
   return DAG.getMergeValues({ Value, NewChain }, SL);
 }
@@ -3518,6 +3706,81 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   return Result;
 }
 
+SDValue
+TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
+                                       const SDLoc &DL, EVT DataVT,
+                                       SelectionDAG &DAG,
+                                       bool IsCompressedMemory) const {
+  SDValue Increment;
+  EVT AddrVT = Addr.getValueType();
+  EVT MaskVT = Mask.getValueType();
+  assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+         "Incompatible types of Data and Mask");
+  if (IsCompressedMemory) {
+    // Incrementing the pointer according to number of '1's in the mask.
+    EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+    SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+    if (MaskIntVT.getSizeInBits() < 32) {
+      MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+      MaskIntVT = MVT::i32;
+    }
+
+    // Count '1's with POPCNT.
+    Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
+    Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
+    // Scale is an element size in bytes.
+    SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
+                                    AddrVT);
+    Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+  } else
+    Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+
+  return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
+}
+
+static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
+                                       SDValue Idx,
+                                       EVT VecVT,
+                                       const SDLoc &dl) {
+  if (isa<ConstantSDNode>(Idx))
+    return Idx;
+
+  EVT IdxVT = Idx.getValueType();
+  unsigned NElts = VecVT.getVectorNumElements();
+  if (isPowerOf2_32(NElts)) {
+    APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
+                                     Log2_32(NElts));
+    return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+                       DAG.getConstant(Imm, dl, IdxVT));
+  }
+
+  return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
+                     DAG.getConstant(NElts - 1, dl, IdxVT));
+}
+
+SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
+                                                SDValue VecPtr, EVT VecVT,
+                                                SDValue Index) const {
+  SDLoc dl(Index);
+  // Make sure the index type is big enough to compute in.
+  Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
+
+  EVT EltVT = VecVT.getVectorElementType();
+
+  // Calculate the element offset and add it to the pointer.
+  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+  assert(EltSize * 8 == EltVT.getSizeInBits() &&
+         "Converting bits to bytes lost precision");
+
+  Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
+
+  EVT IdxVT = Index.getValueType();
+
+  Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+                      DAG.getConstant(EltSize, dl, IdxVT));
+  return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
+}
+
 //===----------------------------------------------------------------------===//
 // Implementation of Emulated TLS Model
 //===----------------------------------------------------------------------===//
@@ -3550,11 +3813,36 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
   // At last for X86 targets, maybe good for other targets too?
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setAdjustsStack(true);  // Is this only for X86 target?
-  MFI->setHasCalls(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setAdjustsStack(true);  // Is this only for X86 target?
+  MFI.setHasCalls(true);
 
   assert((GA->getOffset() == 0) &&
          "Emulated TLS must have zero offset in GlobalAddressSDNode");
   return CallResult.first;
 }
+
+SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
+  if (!isCtlzFast())
+    return SDValue();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDLoc dl(Op);
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (C->isNullValue() && CC == ISD::SETEQ) {
+      EVT VT = Op.getOperand(0).getValueType();
+      SDValue Zext = Op.getOperand(0);
+      if (VT.bitsLT(MVT::i32)) {
+        VT = MVT::i32;
+        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+      }
+      unsigned Log2b = Log2_32(VT.getSizeInBits());
+      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+                                DAG.getConstant(Log2b, dl, MVT::i32));
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+    }
+  }
+  return SDValue();
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 1efc440..ff7d205 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
 
 using namespace llvm;
 
@@ -81,121 +82,6 @@ ShadowStackGCLowering::ShadowStackGCLowering()
   initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
 }
 
-namespace {
-/// EscapeEnumerator - This is a little algorithm to find all escape points
-/// from a function so that "finally"-style code can be inserted. In addition
-/// to finding the existing return and unwind instructions, it also (if
-/// necessary) transforms any call instructions into invokes and sends them to
-/// a landing pad.
-///
-/// It's wrapped up in a state machine using the same transform C# uses for
-/// 'yield return' enumerators, This transform allows it to be non-allocating.
-class EscapeEnumerator {
-  Function &F;
-  const char *CleanupBBName;
-
-  // State.
-  int State;
-  Function::iterator StateBB, StateE;
-  IRBuilder<> Builder;
-
-public:
-  EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
-  IRBuilder<> *Next() {
-    switch (State) {
-    default:
-      return nullptr;
-
-    case 0:
-      StateBB = F.begin();
-      StateE = F.end();
-      State = 1;
-
-    case 1:
-      // Find all 'return', 'resume', and 'unwind' instructions.
-      while (StateBB != StateE) {
-        BasicBlock *CurBB = &*StateBB++;
-
-        // Branches and invokes do not escape, only unwind, resume, and return
-        // do.
-        TerminatorInst *TI = CurBB->getTerminator();
-        if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
-          continue;
-
-        Builder.SetInsertPoint(TI);
-        return &Builder;
-      }
-
-      State = 2;
-
-      // Find all 'call' instructions.
-      SmallVector<Instruction *, 16> Calls;
-      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-        for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
-             ++II)
-          if (CallInst *CI = dyn_cast<CallInst>(II))
-            if (!CI->getCalledFunction() ||
-                !CI->getCalledFunction()->getIntrinsicID())
-              Calls.push_back(CI);
-
-      if (Calls.empty())
-        return nullptr;
-
-      // Create a cleanup block.
-      LLVMContext &C = F.getContext();
-      BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
-      Type *ExnTy =
-          StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
-      if (!F.hasPersonalityFn()) {
-        Constant *PersFn = F.getParent()->getOrInsertFunction(
-            "__gcc_personality_v0",
-            FunctionType::get(Type::getInt32Ty(C), true));
-        F.setPersonalityFn(PersFn);
-      }
-      LandingPadInst *LPad =
-          LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
-      LPad->setCleanup(true);
-      ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
-
-      // Transform the 'call' instructions into 'invoke's branching to the
-      // cleanup block. Go in reverse order to make prettier BB names.
-      SmallVector<Value *, 16> Args;
-      for (unsigned I = Calls.size(); I != 0;) {
-        CallInst *CI = cast<CallInst>(Calls[--I]);
-
-        // Split the basic block containing the function call.
-        BasicBlock *CallBB = CI->getParent();
-        BasicBlock *NewBB = CallBB->splitBasicBlock(
-            CI->getIterator(), CallBB->getName() + ".cont");
-
-        // Remove the unconditional branch inserted at the end of CallBB.
-        CallBB->getInstList().pop_back();
-        NewBB->getInstList().remove(CI);
-
-        // Create a new invoke instruction.
-        Args.clear();
-        CallSite CS(CI);
-        Args.append(CS.arg_begin(), CS.arg_end());
-
-        InvokeInst *II =
-            InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
-                               CI->getName(), CallBB);
-        II->setCallingConv(CI->getCallingConv());
-        II->setAttributes(CI->getAttributes());
-        CI->replaceAllUsesWith(II);
-        delete CI;
-      }
-
-      Builder.SetInsertPoint(RI);
-      return &Builder;
-    }
-  }
-};
-}
-
-
 Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
   // doInitialization creates the abstract type of this value.
   Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index d361a6c..4837495 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -199,9 +199,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  const char *getPassName() const override {
-    return "Shrink Wrapping analysis";
-  }
+  StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
 
   /// \brief Perform the shrink-wrapping analysis and update
   /// the MachineFrameInfo attached to \p MF with the results.
@@ -256,8 +254,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
 
 /// \brief Helper function to find the immediate (post) dominator.
 template <typename ListOfBBs, typename DominanceAnalysis>
-MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
-                            DominanceAnalysis &Dom) {
+static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+                                   DominanceAnalysis &Dom) {
   MachineBasicBlock *IDom = &Block;
   for (MachineBasicBlock *BB : BBs) {
     IDom = Dom.findNearestCommonDominator(IDom, BB);
@@ -521,9 +519,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
                << ' ' << Save->getName() << "\nRestore: "
                << Restore->getNumber() << ' ' << Restore->getName() << '\n');
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setSavePoint(Save);
-  MFI->setRestorePoint(Restore);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setSavePoint(Save);
+  MFI.setRestorePoint(Restore);
   ++NumCandidates;
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index ce01c5f..209bbe5 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -58,7 +58,7 @@ public:
   bool runOnFunction(Function &F) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {}
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SJLJ Exception Handling preparation";
   }
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 07be24b..1c6a84e 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -381,9 +381,59 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
 }
 #endif
 
+LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
+                                                        LiveInterval &LI) {
+  for (LiveInterval::SubRange &S : LI.subranges())
+    if (S.LaneMask == LM)
+      return S;
+  llvm_unreachable("SubRange for this mask not found");
+}
+
+void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
+  if (!LI.hasSubRanges()) {
+    LI.createDeadDef(VNI);
+    return;
+  }
+
+  SlotIndex Def = VNI->def;
+  if (Original) {
+    // If we are transferring a def from the original interval, make sure
+    // to only update the subranges for which the original subranges had
+    // a def at this location.
+    for (LiveInterval::SubRange &S : LI.subranges()) {
+      auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent());
+      VNInfo *PV = PS.getVNInfoAt(Def);
+      if (PV != nullptr && PV->def == Def)
+        S.createDeadDef(Def, LIS.getVNInfoAllocator());
+    }
+  } else {
+    // This is a new def: either from rematerialization, or from an inserted
+    // copy. Since rematerialization can regenerate a definition of a sub-
+    // register, we need to check which subranges need to be updated.
+    const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
+    assert(DefMI != nullptr);
+    LaneBitmask LM;
+    for (const MachineOperand &DefOp : DefMI->defs()) {
+      unsigned R = DefOp.getReg();
+      if (R != LI.reg)
+        continue;
+      if (unsigned SR = DefOp.getSubReg())
+        LM |= TRI.getSubRegIndexLaneMask(SR);
+      else {
+        LM = MRI.getMaxLaneMaskForVReg(R);
+        break;
+      }
+    }
+    for (LiveInterval::SubRange &S : LI.subranges())
+      if ((S.LaneMask & LM).any())
+        S.createDeadDef(Def, LIS.getVNInfoAllocator());
+  }
+}
+
 VNInfo *SplitEditor::defValue(unsigned RegIdx,
                               const VNInfo *ParentVNI,
-                              SlotIndex Idx) {
+                              SlotIndex Idx,
+                              bool Original) {
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
   assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
@@ -392,28 +442,28 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   // Create a new value.
   VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
 
+  bool Force = LI->hasSubRanges();
+  ValueForcePair FP(Force ? nullptr : VNI, Force);
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator, bool> InsP =
-    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
-                                 ValueForcePair(VNI, false)));
+    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP));
 
-  // This was the first time (RegIdx, ParentVNI) was mapped.
-  // Keep it as a simple def without any liveness.
-  if (InsP.second)
+  // This was the first time (RegIdx, ParentVNI) was mapped, and it is not
+  // forced. Keep it as a simple def without any liveness.
+  if (!Force && InsP.second)
     return VNI;
 
   // If the previous value was a simple mapping, add liveness for it now.
   if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
-    SlotIndex Def = OldVNI->def;
-    LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI));
-    // No longer a simple mapping.  Switch to a complex, non-forced mapping.
-    InsP.first->second = ValueForcePair();
+    addDeadDef(*LI, OldVNI, Original);
+
+    // No longer a simple mapping.  Switch to a complex mapping. If the
+    // interval has subranges, make it a forced mapping.
+    InsP.first->second = ValueForcePair(nullptr, Force);
   }
 
   // This is a complex mapping, add liveness for VNI
-  SlotIndex Def = VNI->def;
-  LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
-
+  addDeadDef(*LI, VNI, Original);
   return VNI;
 }
 
@@ -431,9 +481,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
 
   // This was previously a single mapping. Make sure the old def is represented
   // by a trivial live range.
-  SlotIndex Def = VNI->def;
-  LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
-  LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
+  addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false);
+
   // Mark as complex mapped, forced.
   VFP = ValueForcePair(nullptr, true);
 }
@@ -455,13 +504,18 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
   LiveInterval &OrigLI = LIS.getInterval(Original);
   VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
-  LiveRangeEdit::Remat RM(ParentVNI);
-  RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
 
-  if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
-    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
-    ++NumRemats;
-  } else {
+  bool DidRemat = false;
+  if (OrigVNI) {
+    LiveRangeEdit::Remat RM(ParentVNI);
+    RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+    if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+      Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+      ++NumRemats;
+      DidRemat = true;
+    }
+  }
+  if (!DidRemat) {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
                .addReg(Edit->getReg());
@@ -472,7 +526,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   }
 
   // Define the value in Reg.
-  return defValue(RegIdx, ParentVNI, Def);
+  return defValue(RegIdx, ParentVNI, Def, false);
 }
 
 /// Create a new virtual register and live interval.
@@ -621,7 +675,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
   }
 
   VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
-                              MBB.SkipPHIsAndLabels(MBB.begin()));
+                              MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
   RegAssign.insert(Start, VNI->def, OpenIdx);
   DEBUG(dump());
   return VNI->def;
@@ -944,14 +998,15 @@ bool SplitEditor::transferValues() {
       }
 
       // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
-      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
-      LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
+                   << '(' << PrintReg(Edit->get(RegIdx)) << ')');
+      LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
 
       // Check for a simply defined value that can be blitted directly.
       ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
       if (VNInfo *VNI = VFP.getPointer()) {
         DEBUG(dbgs() << ':' << VNI->id);
-        LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+        LI.addSegment(LiveInterval::Segment(Start, End, VNI));
         Start = End;
         continue;
       }
@@ -975,7 +1030,7 @@ bool SplitEditor::transferValues() {
 
       // The first block may be live-in, or it may have its own def.
       if (Start != BlockStart) {
-        VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+        VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
         assert(VNI && "Missing def for complex mapped value");
         DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
         // MBB has its own def. Is it also live-out?
@@ -995,7 +1050,7 @@ bool SplitEditor::transferValues() {
         if (BlockStart == ParentVNI->def) {
           // This block has the def of a parent PHI, so it isn't live-in.
           assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
-          VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+          VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
           assert(VNI && "Missing def for complex mapped parent PHI");
           if (End >= BlockEnd)
             LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
@@ -1003,10 +1058,10 @@ bool SplitEditor::transferValues() {
           // This block needs a live-in value.  The last block covered may not
           // be live-out.
           if (End < BlockEnd)
-            LRC.addLiveInBlock(LR, MDT[&*MBB], End);
+            LRC.addLiveInBlock(LI, MDT[&*MBB], End);
           else {
             // Live-through, and we don't know the value.
-            LRC.addLiveInBlock(LR, MDT[&*MBB]);
+            LRC.addLiveInBlock(LI, MDT[&*MBB]);
             LRC.setLiveOutValue(&*MBB, nullptr);
           }
         }
@@ -1025,42 +1080,90 @@ bool SplitEditor::transferValues() {
   return Skipped;
 }
 
+static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) {
+  const LiveRange::Segment *Seg = LR.getSegmentContaining(Def);
+  if (Seg == nullptr)
+    return true;
+  if (Seg->end != Def.getDeadSlot())
+    return false;
+  // This is a dead PHI. Remove it.
+  LR.removeSegment(*Seg, true);
+  return true;
+}
+
+void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+                                 LiveRange &LR, LaneBitmask LM,
+                                 ArrayRef<SlotIndex> Undefs) {
+  for (MachineBasicBlock *P : B.predecessors()) {
+    SlotIndex End = LIS.getMBBEndIdx(P);
+    SlotIndex LastUse = End.getPrevSlot();
+    // The predecessor may not have a live-out value. That is OK, like an
+    // undef PHI operand.
+    LiveInterval &PLI = Edit->getParent();
+    // Need the cast because the inputs to ?: would otherwise be deemed
+    // "incompatible": SubRange vs LiveInterval.
+    LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
+                               : static_cast<LiveRange&>(PLI);
+    if (PSR.liveAt(LastUse))
+      LRC.extend(LR, End, /*PhysReg=*/0, Undefs);
+  }
+}
+
 void SplitEditor::extendPHIKillRanges() {
   // Extend live ranges to be live-out for successor PHI values.
-  for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
-    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
-      continue;
-    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
-    LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
-
-    // Check whether PHI is dead.
-    const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
-    assert(Segment != nullptr && "Missing segment for VNI");
-    if (Segment->end == PHIVNI->def.getDeadSlot()) {
-      // This is a dead PHI. Remove it.
-      LR.removeSegment(*Segment, true);
+
+  // Visit each PHI def slot in the parent live interval. If the def is dead,
+  // remove it. Otherwise, extend the live interval to reach the end indexes
+  // of all predecessor blocks.
+
+  LiveInterval &ParentLI = Edit->getParent();
+  for (const VNInfo *V : ParentLI.valnos) {
+    if (V->isUnused() || !V->isPHIDef())
       continue;
-    }
 
+    unsigned RegIdx = RegAssign.lookup(V->def);
+    LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
     LiveRangeCalc &LRC = getLRCalc(RegIdx);
-    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
-    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-      SlotIndex End = LIS.getMBBEndIdx(*PI);
-      SlotIndex LastUse = End.getPrevSlot();
-      // The predecessor may not have a live-out value. That is OK, like an
-      // undef PHI operand.
-      if (Edit->getParent().liveAt(LastUse)) {
-        assert(RegAssign.lookup(LastUse) == RegIdx &&
-               "Different register assignment in phi predecessor");
-        LRC.extend(LR, End);
-      }
+    MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+    if (!removeDeadSegment(V->def, LI))
+      extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
+  }
+
+  SmallVector<SlotIndex, 4> Undefs;
+  LiveRangeCalc SubLRC;
+
+  for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
+    for (const VNInfo *V : PS.valnos) {
+      if (V->isUnused() || !V->isPHIDef())
+        continue;
+      unsigned RegIdx = RegAssign.lookup(V->def);
+      LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+      LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI);
+      if (removeDeadSegment(V->def, S))
+        continue;
+
+      MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+      SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+                   &LIS.getVNInfoAllocator());
+      Undefs.clear();
+      LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes());
+      extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs);
     }
   }
 }
 
 /// rewriteAssigned - Rewrite all uses of Edit->getReg().
 void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+  struct ExtPoint {
+    ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
+      : MO(O), RegIdx(R), Next(N) {}
+    MachineOperand MO;
+    unsigned RegIdx;
+    SlotIndex Next;
+  };
+
+  SmallVector<ExtPoint,4> ExtPoints;
+
   for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
        RE = MRI.reg_end(); RI != RE;) {
     MachineOperand &MO = *RI;
@@ -1082,8 +1185,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
 
     // Rewrite to the mapped register at Idx.
     unsigned RegIdx = RegAssign.lookup(Idx);
-    LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
-    MO.setReg(LI->reg);
+    LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+    MO.setReg(LI.reg);
     DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
                  << Idx << ':' << RegIdx << '\t' << *MI);
 
@@ -1095,7 +1198,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     if (MO.isDef()) {
       if (!MO.getSubReg() && !MO.isEarlyClobber())
         continue;
-      // We may wan't to extend a live range for a partial redef, or for a use
+      // We may want to extend a live range for a partial redef, or for a use
       // tied to an early clobber.
       Idx = Idx.getPrevSlot();
       if (!Edit->getParent().liveAt(Idx))
@@ -1103,7 +1206,53 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     } else
       Idx = Idx.getRegSlot(true);
 
-    getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot());
+    SlotIndex Next = Idx.getNextSlot();
+    if (LI.hasSubRanges()) {
+      // We have to delay extending subranges until we have seen all operands
+      // defining the register. This is because a <def,read-undef> operand
+      // will create an "undef" point, and we cannot extend any subranges
+      // until all of them have been accounted for.
+      if (MO.isUse())
+        ExtPoints.push_back(ExtPoint(MO, RegIdx, Next));
+    } else {
+      LiveRangeCalc &LRC = getLRCalc(RegIdx);
+      LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
+    }
+  }
+
+  for (ExtPoint &EP : ExtPoints) {
+    LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx));
+    assert(LI.hasSubRanges());
+
+    LiveRangeCalc SubLRC;
+    unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+    LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
+                              : MRI.getMaxLaneMaskForVReg(Reg);
+    for (LiveInterval::SubRange &S : LI.subranges()) {
+      if ((S.LaneMask & LM).none())
+        continue;
+      // The problem here can be that the new register may have been created
+      // for a partially defined original register. For example:
+      //   %vreg827:subreg_hireg<def,read-undef> = ...
+      //   ...
+      //   %vreg828<def> = COPY %vreg827
+      if (S.empty())
+        continue;
+      SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+                   &LIS.getVNInfoAllocator());
+      SmallVector<SlotIndex, 4> Undefs;
+      LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes());
+      SubLRC.extend(S, EP.Next, 0, Undefs);
+    }
+  }
+
+  for (unsigned R : *Edit) {
+    LiveInterval &LI = LIS.getInterval(R);
+    if (!LI.hasSubRanges())
+      continue;
+    LI.clear();
+    LI.removeEmptySubRanges();
+    LIS.constructMainRangeFromSubranges(LI);
   }
 }
 
@@ -1146,7 +1295,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     if (ParentVNI->isUnused())
       continue;
     unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
-    defValue(RegIdx, ParentVNI, ParentVNI->def);
+    defValue(RegIdx, ParentVNI, ParentVNI->def, true);
 
     // Force rematted values to be recomputed everywhere.
     // The new live ranges may be truncated.
@@ -1182,8 +1331,9 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     deleteRematVictims();
 
   // Get rid of unused values and set phi-kill flags.
-  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) {
-    LiveInterval &LI = LIS.getInterval(*I);
+  for (unsigned Reg : *Edit) {
+    LiveInterval &LI = LIS.getInterval(Reg);
+    LI.removeEmptySubRanges();
     LI.RenumberValues();
   }
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index a968494..a75738a 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -325,12 +325,30 @@ private:
     return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
   }
 
+  /// Find a subrange corresponding to the lane mask @p LM in the live
+  /// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+  /// This function is used to find corresponding subranges between the
+  /// original interval and the new intervals.
+  LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
+
+  /// Add a segment to the interval LI for the value number VNI. If LI has
+  /// subranges, corresponding segments will be added to them as well, but
+  /// with newly created value numbers. If Original is true, dead def will
+  /// only be added a subrange of LI if the corresponding subrange of the
+  /// original interval has a def at this index. Otherwise, all subranges
+  /// of LI will be updated.
+  void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original);
+
   /// defValue - define a value in RegIdx from ParentVNI at Idx.
   /// Idx does not have to be ParentVNI->def, but it must be contained within
   /// ParentVNI's live range in ParentLI. The new value is added to the value
-  /// map.
+  /// map. The value being defined may either come from rematerialization
+  /// (or an inserted copy), or it may be coming from the original interval.
+  /// The parameter Original should be true in the latter case, otherwise
+  /// it should be false.
   /// Return the new LI value.
-  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx,
+                   bool Original);
 
   /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
   /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
@@ -368,6 +386,15 @@ private:
   /// Return true if any ranges were skipped.
   bool transferValues();
 
+  /// Live range @p LR corresponding to the lane Mask @p LM has a live
+  /// PHI def at the beginning of block @p B. Extend the range @p LR of
+  /// all predecessor values that reach this def. If @p LR is a subrange,
+  /// the array @p Undefs is the set of all locations where it is undefined
+  /// via <def,read-undef> in other subranges for the same register.
+  void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+                      LiveRange &LR, LaneBitmask LM,
+                      ArrayRef<SlotIndex> Undefs);
+
   /// extendPHIKillRanges - Extend the ranges of all values killed by original
   /// parent PHIDefs.
   void extendPHIKillRanges();
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 87cd470..89c4b57 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -778,10 +778,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   unsigned FixedInstr = 0;
   unsigned FixedMemOp = 0;
   unsigned FixedDbg = 0;
-  MachineModuleInfo *MMI = &MF->getMMI();
 
   // Remap debug information that refers to stack slots.
-  for (auto &VI : MMI->getVariableDbgInfo()) {
+  for (auto &VI : MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     if (SlotRemap.count(VI.Slot)) {
@@ -980,7 +979,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
                << "********** Function: "
                << ((const Value*)Func.getFunction())->getName() << '\n');
   MF = &Func;
-  MFI = MF->getFrameInfo();
+  MFI = &MF->getFrameInfo();
   Indexes = &getAnalysis<SlotIndexes>();
   SP = &getAnalysis<StackProtector>();
   BlockLiveness.clear();
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 87e4eb6..a5ef7c8 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -17,7 +17,6 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
@@ -64,7 +63,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
   /// \brief Calculate the liveness information for the given machine function.
@@ -113,7 +112,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
   ++NumStackMapFuncVisited;
 
   // Skip this function if there are no patchpoints to process.
-  if (!MF.getFrameInfo()->hasPatchPoint()) {
+  if (!MF.getFrameInfo().hasPatchPoint()) {
     ++NumStackMapFuncSkipped;
     return false;
   }
@@ -126,7 +125,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
   // For all basic blocks in the function.
   for (auto &MBB : MF) {
     DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
-    LiveRegs.init(TRI);
+    LiveRegs.init(*TRI);
     // FIXME: This should probably be addLiveOuts().
     LiveRegs.addLiveOutsNoPristines(MBB);
     bool HasStackMap = false;
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index d91bb80..9b7dd400 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -30,16 +30,20 @@ using namespace llvm;
 #define DEBUG_TYPE "stackmaps"
 
 static cl::opt<int> StackMapVersion(
-    "stackmap-version", cl::init(1),
-    cl::desc("Specify the stackmap encoding version (default = 1)"));
+    "stackmap-version", cl::init(2),
+    cl::desc("Specify the stackmap encoding version (default = 2)"));
 
 const char *StackMaps::WSMP = "Stack Maps: ";
 
+StackMapOpers::StackMapOpers(const MachineInstr *MI)
+  : MI(MI) {
+  assert(getVarIdx() <= MI->getNumOperands() &&
+         "invalid stackmap definition");
+}
+
 PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
     : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
-                     !MI->getOperand(0).isImplicit()),
-      IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
-               CallingConv::AnyReg) {
+                     !MI->getOperand(0).isImplicit()) {
 #ifndef NDEBUG
   unsigned CheckStartIdx = 0, e = MI->getNumOperands();
   while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
@@ -70,7 +74,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
 }
 
 StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
-  if (StackMapVersion != 1)
+  if (StackMapVersion != 2)
     llvm_unreachable("Unsupported stackmap version!");
 }
 
@@ -272,8 +276,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   }
 
   LiveOuts.erase(
-      std::remove_if(LiveOuts.begin(), LiveOuts.end(),
-                     [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+      remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
       LiveOuts.end());
 
   return LiveOuts;
@@ -332,20 +335,26 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
   CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
                        std::move(LiveOuts));
 
-  // Record the stack size of the current function.
-  const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+  // Record the stack size of the current function and update callsite count.
+  const MachineFrameInfo &MFI = AP.MF->getFrameInfo();
   const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
   bool HasDynamicFrameSize =
-      MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
-  FnStackSize[AP.CurrentFnSym] =
-      HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+      MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
+  uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize();
+
+  auto CurrentIt = FnInfos.find(AP.CurrentFnSym);
+  if (CurrentIt != FnInfos.end())
+    CurrentIt->second.RecordCount++;
+  else
+    FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
 }
 
 void StackMaps::recordStackMap(const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
 
-  int64_t ID = MI.getOperand(0).getImm();
-  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2),
+  StackMapOpers opers(&MI);
+  const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
+  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
                       MI.operands_end());
 }
 
@@ -353,8 +362,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
 
   PatchPointOpers opers(&MI);
-  int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
-
+  const int64_t ID = opers.getID();
   auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
   recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
                       opers.isAnyReg() && opers.hasDef());
@@ -363,7 +371,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
   // verify anyregcc
   auto &Locations = CSInfos.back().Locations;
   if (opers.isAnyReg()) {
-    unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
+    unsigned NArgs = opers.getNumCallArgs();
     for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
       assert(Locations[i].Type == Location::Register &&
              "anyreg arg must be in reg.");
@@ -384,7 +392,7 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
 /// Emit the stackmap header.
 ///
 /// Header {
-///   uint8  : Stack Map Version (currently 1)
+///   uint8  : Stack Map Version (currently 2)
 ///   uint8  : Reserved (expected to be 0)
 ///   uint16 : Reserved (expected to be 0)
 /// }
@@ -398,8 +406,8 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
   OS.EmitIntValue(0, 2);               // Reserved.
 
   // Num functions.
-  DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
-  OS.EmitIntValue(FnStackSize.size(), 4);
+  DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
+  OS.EmitIntValue(FnInfos.size(), 4);
   // Num constants.
   DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
   OS.EmitIntValue(ConstPool.size(), 4);
@@ -413,15 +421,18 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
 /// StkSizeRecord[NumFunctions] {
 ///   uint64 : Function Address
 ///   uint64 : Stack Size
+///   uint64 : Record Count
 /// }
 void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
   // Function Frame records.
   DEBUG(dbgs() << WSMP << "functions:\n");
-  for (auto const &FR : FnStackSize) {
+  for (auto const &FR : FnInfos) {
     DEBUG(dbgs() << WSMP << "function addr: " << FR.first
-                 << " frame size: " << FR.second);
+                 << " frame size: " << FR.second.StackSize
+                 << " callsite count: " << FR.second.RecordCount << '\n');
     OS.EmitSymbolValue(FR.first, 8);
-    OS.EmitIntValue(FR.second, 8);
+    OS.EmitIntValue(FR.second.StackSize, 8);
+    OS.EmitIntValue(FR.second.RecordCount, 8);
   }
 }
 
@@ -522,7 +533,7 @@ void StackMaps::serializeToStackMapSection() {
   // Bail out if there's no stack map data.
   assert((!CSInfos.empty() || ConstPool.empty()) &&
          "Expected empty constant pool too!");
-  assert((!CSInfos.empty() || FnStackSize.empty()) &&
+  assert((!CSInfos.empty() || FnInfos.empty()) &&
          "Expected empty function record too!");
   if (CSInfos.empty())
     return;
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index 89868e4..c2c010a 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -50,7 +50,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
                                           cl::init(true), cl::Hidden);
 
 char StackProtector::ID = 0;
-INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors",
+INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
                 false, true)
 
 FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
@@ -236,11 +236,6 @@ bool StackProtector::RequiresStackProtector() {
     for (const Instruction &I : BB) {
       if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
         if (AI->isArrayAllocation()) {
-          // SSP-Strong: Enable protectors for any call to alloca, regardless
-          // of size.
-          if (Strong)
-            return true;
-
           if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
             if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
               // A call to alloca with size >= SSPBufferSize requires
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index d996714..234b204 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
        I != E; ++I) {
     if (DCELimit != -1 && (int)NumDead >= DCELimit)
       break;
-
     int FirstSS, SecondSS;
     if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
         FirstSS != -1) {
@@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
     }
 
     MachineBasicBlock::iterator NextMI = std::next(I);
-    if (NextMI == MBB->end()) continue;
+    MachineBasicBlock::iterator ProbableLoadMI = I;
 
     unsigned LoadReg = 0;
     unsigned StoreReg = 0;
     if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
       continue;
+    // Skip the ...pseudo debugging... instructions between a load and store.
+    while ((NextMI != E) && NextMI->isDebugValue()) {
+      ++NextMI;
+      ++I;
+    }
+    if (NextMI == E) continue;
     if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
       continue;
     if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
@@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
     if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
       ++NumDead;
-      toErase.push_back(&*I);
+      toErase.push_back(&*ProbableLoadMI);
     }
 
     toErase.push_back(&*NextMI);
@@ -428,7 +433,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
              << "********** Function: " << MF.getName() << '\n';
     });
 
-  MFI = MF.getFrameInfo();
+  MFI = &MF.getFrameInfo();
   TII = MF.getSubtarget().getInstrInfo();
   LS = &getAnalysis<LiveStacks>();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 2b1fb12..e2377d8 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -47,13 +47,12 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
 
-  auto MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
-  Duplicator.initMF(MF, MMI, MBPI);
+  Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
 
   bool MadeChange = false;
-  while (Duplicator.tailDuplicateBlocks(MF))
+  while (Duplicator.tailDuplicateBlocks())
     MadeChange = true;
 
   return MadeChange;
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
index 847a093..7709236 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
@@ -40,12 +41,20 @@ STATISTIC(NumTailDupRemoved,
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumAddedPHIs, "Number of phis added");
 
+namespace llvm {
+
 // Heuristic for tail duplication.
 static cl::opt<unsigned> TailDuplicateSize(
     "tail-dup-size",
     cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
     cl::Hidden);
 
+cl::opt<unsigned> TailDupIndirectBranchSize(
+    "tail-dup-indirect-size",
+    cl::desc("Maximum instructions to consider tail duplicating blocks that "
+             "end with indirect branches."), cl::init(20),
+    cl::Hidden);
+
 static cl::opt<bool>
     TailDupVerify("tail-dup-verify",
                   cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -54,18 +63,20 @@ static cl::opt<bool>
 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
                                       cl::Hidden);
 
-namespace llvm {
-
-void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin,
-                            const MachineBranchProbabilityInfo *MBPIin) {
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
-  MRI = &MF.getRegInfo();
-  MMI = MMIin;
+void TailDuplicator::initMF(MachineFunction &MFin,
+                            const MachineBranchProbabilityInfo *MBPIin,
+                            bool LayoutModeIn, unsigned TailDupSizeIn) {
+  MF = &MFin;
+  TII = MF->getSubtarget().getInstrInfo();
+  TRI = MF->getSubtarget().getRegisterInfo();
+  MRI = &MF->getRegInfo();
+  MMI = &MF->getMMI();
   MBPI = MBPIin;
+  TailDupSize = TailDupSizeIn;
 
   assert(MBPI != nullptr && "Machine Branch Probability Info required");
 
+  LayoutMode = LayoutModeIn;
   PreRegAlloc = MRI->isSSA();
 }
 
@@ -78,10 +89,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
     while (MI != MBB->end()) {
       if (!MI->isPHI())
         break;
-      for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                            PE = Preds.end();
-           PI != PE; ++PI) {
-        MachineBasicBlock *PredBB = *PI;
+      for (MachineBasicBlock *PredBB : Preds) {
         bool Found = false;
         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
           MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
@@ -119,21 +127,31 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
 }
 
 /// Tail duplicate the block and cleanup.
-bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
-                                            MachineBasicBlock *MBB) {
+/// \p IsSimple - return value of isSimpleBB
+/// \p MBB - block to be duplicated
+/// \p ForcedLayoutPred - If non-null, treat this block as the layout
+///     predecessor, instead of using the ordering in MF
+/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
+///     all Preds that received a copy of \p MBB.
+/// \p RemovalCallback - if non-null, called just before MBB is deleted.
+bool TailDuplicator::tailDuplicateAndUpdate(
+    bool IsSimple, MachineBasicBlock *MBB,
+    MachineBasicBlock *ForcedLayoutPred,
+    SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
+    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   // Save the successors list.
   SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
                                                MBB->succ_end());
 
   SmallVector<MachineBasicBlock *, 8> TDBBs;
   SmallVector<MachineInstr *, 16> Copies;
-  if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies))
+  if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
     return false;
 
   ++NumTails;
 
   SmallVector<MachineInstr *, 8> NewPHIs;
-  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+  MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
 
   // TailBB's immediate successors are now successors of those predecessors
   // which duplicated TailBB. Add the predecessors as sources to the PHI
@@ -145,7 +163,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
   // If it is dead, remove it.
   if (isDead) {
     NumTailDupRemoved += MBB->size();
-    removeDeadBlock(MBB);
+    removeDeadBlock(MBB, RemovalCallback);
     ++NumDeadBlocks;
   }
 
@@ -216,21 +234,24 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
   if (NewPHIs.size())
     NumAddedPHIs += NewPHIs.size();
 
+  if (DuplicatedPreds)
+    *DuplicatedPreds = std::move(TDBBs);
+
   return true;
 }
 
 /// Look for small blocks that are unconditionally branched to and do not fall
 /// through. Tail-duplicate their instructions into their predecessors to
 /// eliminate (dynamic) branches.
-bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
+bool TailDuplicator::tailDuplicateBlocks() {
   bool MadeChange = false;
 
   if (PreRegAlloc && TailDupVerify) {
     DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
-    VerifyPHIs(MF, true);
+    VerifyPHIs(*MF, true);
   }
 
-  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) {
+  for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
     MachineBasicBlock *MBB = &*I++;
 
     if (NumTails == TailDupLimit)
@@ -238,14 +259,14 @@ bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
 
     bool IsSimple = isSimpleBB(MBB);
 
-    if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+    if (!shouldTailDuplicate(IsSimple, *MBB))
       continue;
 
-    MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB);
+    MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
   }
 
   if (PreRegAlloc && TailDupVerify)
-    VerifyPHIs(MF, false);
+    VerifyPHIs(*MF, false);
 
   return MadeChange;
 }
@@ -334,10 +355,9 @@ void TailDuplicator::processPHI(
 /// the source operands due to earlier PHI translation.
 void TailDuplicator::duplicateInstruction(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
-    MachineFunction &MF,
     DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
     const DenseSet<unsigned> &UsedByPhi) {
-  MachineInstr *NewMI = TII->duplicate(*MI, MF);
+  MachineInstr *NewMI = TII->duplicate(*MI, *MF);
   if (PreRegAlloc) {
     for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = NewMI->getOperand(i);
@@ -421,18 +441,14 @@ void TailDuplicator::updateSuccessorsPHIs(
     MachineBasicBlock *FromBB, bool isDead,
     SmallVectorImpl<MachineBasicBlock *> &TDBBs,
     SmallSetVector<MachineBasicBlock *, 8> &Succs) {
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(),
-                                                        SE = Succs.end();
-       SI != SE; ++SI) {
-    MachineBasicBlock *SuccBB = *SI;
-    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
-         II != EE; ++II) {
-      if (!II->isPHI())
+  for (MachineBasicBlock *SuccBB : Succs) {
+    for (MachineInstr &MI : *SuccBB) {
+      if (!MI.isPHI())
         break;
-      MachineInstrBuilder MIB(*FromBB->getParent(), II);
+      MachineInstrBuilder MIB(*FromBB->getParent(), MI);
       unsigned Idx = 0;
-      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
-        MachineOperand &MO = II->getOperand(i + 1);
+      for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+        MachineOperand &MO = MI.getOperand(i + 1);
         if (MO.getMBB() == FromBB) {
           Idx = i;
           break;
@@ -440,17 +456,17 @@ void TailDuplicator::updateSuccessorsPHIs(
       }
 
       assert(Idx != 0);
-      MachineOperand &MO0 = II->getOperand(Idx);
+      MachineOperand &MO0 = MI.getOperand(Idx);
       unsigned Reg = MO0.getReg();
       if (isDead) {
         // Folded into the previous BB.
         // There could be duplicate phi source entries. FIXME: Should sdisel
         // or earlier pass fixed this?
-        for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) {
-          MachineOperand &MO = II->getOperand(i + 1);
+        for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
+          MachineOperand &MO = MI.getOperand(i + 1);
           if (MO.getMBB() == FromBB) {
-            II->RemoveOperand(i + 1);
-            II->RemoveOperand(i);
+            MI.RemoveOperand(i + 1);
+            MI.RemoveOperand(i);
           }
         }
       } else
@@ -474,8 +490,8 @@ void TailDuplicator::updateSuccessorsPHIs(
 
           unsigned SrcReg = LI->second[j].second;
           if (Idx != 0) {
-            II->getOperand(Idx).setReg(SrcReg);
-            II->getOperand(Idx + 1).setMBB(SrcBB);
+            MI.getOperand(Idx).setReg(SrcReg);
+            MI.getOperand(Idx + 1).setMBB(SrcBB);
             Idx = 0;
           } else {
             MIB.addReg(SrcReg).addMBB(SrcBB);
@@ -486,8 +502,8 @@ void TailDuplicator::updateSuccessorsPHIs(
         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = TDBBs[j];
           if (Idx != 0) {
-            II->getOperand(Idx).setReg(Reg);
-            II->getOperand(Idx + 1).setMBB(SrcBB);
+            MI.getOperand(Idx).setReg(Reg);
+            MI.getOperand(Idx + 1).setMBB(SrcBB);
             Idx = 0;
           } else {
             MIB.addReg(Reg).addMBB(SrcBB);
@@ -495,19 +511,20 @@ void TailDuplicator::updateSuccessorsPHIs(
         }
       }
       if (Idx != 0) {
-        II->RemoveOperand(Idx + 1);
-        II->RemoveOperand(Idx);
+        MI.RemoveOperand(Idx + 1);
+        MI.RemoveOperand(Idx);
       }
     }
   }
 }
 
 /// Determine if it is profitable to duplicate this block.
-bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
-                                         bool IsSimple,
+bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
                                          MachineBasicBlock &TailBB) {
-  // Only duplicate blocks that end with unconditional branches.
-  if (TailBB.canFallThrough())
+  // When doing tail-duplication during layout, the block ordering is in flux,
+  // so canFallThrough returns a result based on incorrect information and
+  // should just be ignored.
+  if (!LayoutMode && TailBB.canFallThrough())
     return false;
 
   // Don't try to tail-duplicate single-block loops.
@@ -518,12 +535,24 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      // FIXME: Use Function::optForSize().
-      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+  if (TailDupSize == 0 &&
+      TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF->getFunction()->optForSize())
     MaxDuplicateCount = 1;
-  else
+  else if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
+  else
+    MaxDuplicateCount = TailDupSize;
+
+  // If the block to be duplicated ends in an unanalyzable fallthrough, don't
+  // duplicate it.
+  // A similar check is necessary in MachineBlockPlacement to make sure pairs of
+  // blocks with unanalyzable fallthrough get layed out contiguously.
+  MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+  SmallVector<MachineOperand, 4> PredCond;
+  if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
+      TailBB.canFallThrough())
+    return false;
 
   // If the target has hardware branch prediction that can handle indirect
   // branches, duplicating them can often make them predictable when there
@@ -536,7 +565,7 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
     HasIndirectbr = TailBB.back().isIndirectBranch();
 
   if (HasIndirectbr && PreRegAlloc)
-    MaxDuplicateCount = 20;
+    MaxDuplicateCount = TailDupIndirectBranchSize;
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
@@ -631,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
 
     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
     SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
       return false;
 
     if (!PredCond.empty())
@@ -649,11 +678,7 @@ bool TailDuplicator::duplicateSimpleBB(
   SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
                                             TailBB->pred_end());
   bool Changed = false;
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
+  for (MachineBasicBlock *PredBB : Preds) {
     if (PredBB->hasEHPadSuccessor())
       continue;
 
@@ -662,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB(
 
     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
     SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
       continue;
 
     Changed = true;
@@ -670,7 +695,7 @@ bool TailDuplicator::duplicateSimpleBB(
                  << "From simple Succ: " << *TailBB);
 
     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
-    MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
+    MachineBasicBlock *NextBB = PredBB->getNextNode();
 
     // Make PredFBB explicit.
     if (PredCond.empty())
@@ -700,7 +725,7 @@ bool TailDuplicator::duplicateSimpleBB(
     if (PredTBB == NextBB && PredFBB == nullptr)
       PredTBB = nullptr;
 
-    TII->RemoveBranch(*PredBB);
+    TII->removeBranch(*PredBB);
 
     if (!PredBB->isSuccessor(NewTarget))
       PredBB->replaceSuccessor(TailBB, NewTarget);
@@ -710,17 +735,40 @@ bool TailDuplicator::duplicateSimpleBB(
     }
 
     if (PredTBB)
-      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+      TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
 
     TDBBs.push_back(PredBB);
   }
   return Changed;
 }
 
+bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
+                                      MachineBasicBlock *PredBB) {
+  // EH edges are ignored by analyzeBranch.
+  if (PredBB->succ_size() > 1)
+    return false;
+
+  MachineBasicBlock *PredTBB, *PredFBB;
+  SmallVector<MachineOperand, 4> PredCond;
+  if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+    return false;
+  if (!PredCond.empty())
+    return false;
+  return true;
+}
+
 /// If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
-bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
-                                   MachineBasicBlock *TailBB,
+/// \p IsSimple result of isSimpleBB
+/// \p TailBB   Block to be duplicated.
+/// \p ForcedLayoutPred  When non-null, use this block as the layout predecessor
+///                      instead of the previous block in MF's order.
+/// \p TDBBs             A vector to keep track of all blocks tail-duplicated
+///                      into.
+/// \p Copies            A vector of copy instructions inserted. Used later to
+///                      walk all the inserted copies and remove redundant ones.
+bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
+                                   MachineBasicBlock *ForcedLayoutPred,
                                    SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                    SmallVectorImpl<MachineInstr *> &Copies) {
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
@@ -737,25 +785,20 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   bool Changed = false;
   SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
                                                TailBB->pred_end());
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
+  for (MachineBasicBlock *PredBB : Preds) {
     assert(TailBB != PredBB &&
            "Single-block loop should have been rejected earlier!");
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() > 1)
-      continue;
 
-    MachineBasicBlock *PredTBB, *PredFBB;
-    SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
-      continue;
-    if (!PredCond.empty())
+    if (!canTailDuplicate(TailBB, PredBB))
       continue;
+
     // Don't duplicate into a fall-through predecessor (at least for now).
-    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+    bool IsLayoutSuccessor = false;
+    if (ForcedLayoutPred)
+      IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
+    else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      IsLayoutSuccessor = true;
+    if (IsLayoutSuccessor)
       continue;
 
     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
@@ -764,7 +807,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
     TDBBs.push_back(PredBB);
 
     // Remove PredBB's unconditional branch.
-    TII->RemoveBranch(*PredBB);
+    TII->removeBranch(*PredBB);
 
     // Clone the contents of TailBB into PredBB.
     DenseMap<unsigned, RegSubRegPair> LocalVRMap;
@@ -782,13 +825,15 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
       } else {
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
-        duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+        duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
       }
     }
     appendCopies(PredBB, CopyInfos, Copies);
 
     // Simplify
-    TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
 
     NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
 
@@ -796,10 +841,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
     PredBB->removeSuccessor(PredBB->succ_begin());
     assert(PredBB->succ_empty() &&
            "TailDuplicate called on block with multiple successors!");
-    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
-                                          E = TailBB->succ_end();
-         I != E; ++I)
-      PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
+    for (MachineBasicBlock *Succ : TailBB->successors())
+      PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
 
     Changed = true;
     ++NumTailDups;
@@ -808,17 +851,27 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   // If TailBB was duplicated into all its predecessors except for the prior
   // block, which falls through unconditionally, move the contents of this
   // block into the prior block.
-  MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
+  MachineBasicBlock *PrevBB = ForcedLayoutPred;
+  if (!PrevBB)
+    PrevBB = &*std::prev(TailBB->getIterator());
   MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
-  // AnalyzeBranch.
+  // analyzeBranch.
   if (PrevBB->succ_size() == 1 &&
-      !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
-      PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+      // Layout preds are not always CFG preds. Check.
+      *PrevBB->succ_begin() == TailBB &&
+      !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
+      PriorCond.empty() &&
+      (!PriorTBB || PriorTBB == TailBB) &&
+      TailBB->pred_size() == 1 &&
       !TailBB->hasAddressTaken()) {
     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
                  << "From MBB: " << *TailBB);
+    // There may be a branch to the layout successor. This is unlikely but it
+    // happens. The correct thing to do is to remove the branch before
+    // duplicating the instructions in all cases.
+    TII->removeBranch(*PrevBB);
     if (PreRegAlloc) {
       DenseMap<unsigned, RegSubRegPair> LocalVRMap;
       SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
@@ -837,11 +890,12 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
         // uses with PHI source register or the new registers.
         MachineInstr *MI = &*I++;
         assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
-        duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+        duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
       appendCopies(PrevBB, CopyInfos, Copies);
     } else {
+      TII->removeBranch(*PrevBB);
       // No PHIs to worry about, just splice the instructions over.
       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
     }
@@ -874,11 +928,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   // What we do here is introduce a copy in 3 of the register defined by the
   // phi, just like when we are duplicating 2 into 3, but we don't copy any
   // real instructions or remove the 3 -> 2 edge from the phi in 2.
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-    if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+  for (MachineBasicBlock *PredBB : Preds) {
+    if (is_contained(TDBBs, PredBB))
       continue;
 
     // EH edges
@@ -917,10 +968,15 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
 
 /// Remove the specified dead machine basic block from the function, updating
 /// the CFG.
-void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
+void TailDuplicator::removeDeadBlock(
+    MachineBasicBlock *MBB,
+    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
 
+  if (RemovalCallback)
+    (*RemovalCallback)(MBB);
+
   // Remove all successors.
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end() - 1);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cac7e63..f082add 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -40,7 +40,7 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
 /// is overridden for some targets.
 int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                              int FI, unsigned &FrameReg) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
 
   // By default, assume all frame indices are referenced via whatever
@@ -48,13 +48,13 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
   // something different.
   FrameReg = RI->getFrameRegister(MF);
 
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-         getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+  return MFI.getObjectOffset(FI) + MFI.getStackSize() -
+         getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
 }
 
 bool TargetFrameLowering::needsFrameIndexResolution(
     const MachineFunction &MF) const {
-  return MF.getFrameInfo()->hasStackObjects();
+  return MF.getFrameInfo().hasStackObjects();
 }
 
 void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -84,7 +84,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
     return;
 
   // Functions which call __builtin_unwind_init get all their registers saved.
-  bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+  bool CallsUnwindInit = MF.callsUnwindInit();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e7330c6..01f91b9 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -84,8 +84,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
     if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
                                 strlen(MAI.getSeparatorString())) == 0) {
       atInsnStart = true;
-    } else if (strncmp(Str, MAI.getCommentString(),
-                       strlen(MAI.getCommentString())) == 0) {
+    } else if (strncmp(Str, MAI.getCommentString().data(),
+                       MAI.getCommentString().size()) == 0) {
       // Stop counting as an instruction after a comment until the next
       // separator.
       atInsnStart = false;
@@ -119,7 +119,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 
   // If MBB isn't immediately before MBB, insert a branch to it.
   if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
-    InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
+    insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
   MBB->addSuccessor(NewDest);
 }
 
@@ -437,13 +437,20 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
                                     const TargetInstrInfo &TII) {
   unsigned StartIdx = 0;
   switch (MI.getOpcode()) {
-  case TargetOpcode::STACKMAP:
-    StartIdx = 2; // Skip ID, nShadowBytes.
+  case TargetOpcode::STACKMAP: {
+    // StackMapLiveValues are foldable
+    StartIdx = StackMapOpers(&MI).getVarIdx();
     break;
+  }
   case TargetOpcode::PATCHPOINT: {
-    // For PatchPoint, the call args are not foldable.
-    PatchPointOpers opers(&MI);
-    StartIdx = opers.getVarIdx();
+    // For PatchPoint, the call args are not foldable (even if reported in the
+    // stackmap e.g. via anyregcc).
+    StartIdx = PatchPointOpers(&MI).getVarIdx();
+    break;
+  }
+  case TargetOpcode::STATEPOINT: {
+    // For statepoints, fold deopt and gc arguments, but not call arguments.
+    StartIdx = StatepointOpers(&MI).getVarIdx();
     break;
   }
   default:
@@ -467,7 +474,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
 
   for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
     MachineOperand &MO = MI.getOperand(i);
-    if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+    if (is_contained(Ops, i)) {
       unsigned SpillSize;
       unsigned SpillOffset;
       // Compute the spill slot size and offset.
@@ -508,10 +515,36 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   assert(MBB && "foldMemoryOperand needs an inserted instruction");
   MachineFunction &MF = *MBB->getParent();
 
+  // If we're not folding a load into a subreg, the size of the load is the
+  // size of the spill slot. But if we are, we need to figure out what the
+  // actual load size is.
+  int64_t MemSize = 0;
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+  if (Flags & MachineMemOperand::MOStore) {
+    MemSize = MFI.getObjectSize(FI);
+  } else {
+    for (unsigned Idx : Ops) {
+      int64_t OpSize = MFI.getObjectSize(FI);
+
+      if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+        unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
+        if (SubRegSize > 0 && !(SubRegSize % 8))
+          OpSize = SubRegSize / 8;
+      }
+
+      MemSize = std::max(MemSize, OpSize);
+    }
+  }
+
+  assert(MemSize && "Did not expect a zero-sized stack slot");
+
   MachineInstr *NewMI = nullptr;
 
   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
-      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+      MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+      MI.getOpcode() == TargetOpcode::STATEPOINT) {
     // Fold stackmap/patchpoint.
     NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
     if (NewMI)
@@ -530,10 +563,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
     assert((!(Flags & MachineMemOperand::MOLoad) ||
             NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
-    const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO = MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+        MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
         MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
 
@@ -550,7 +582,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
 
   const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
   MachineBasicBlock::iterator Pos = MI;
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
 
   if (Flags == MachineMemOperand::MOStore)
     storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
@@ -792,7 +823,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   int FrameIndex = 0;
 
   if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
-       MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
+       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+       MI.getOpcode() == TargetOpcode::STATEPOINT) &&
       isLoadFromStackSlot(LoadMI, FrameIndex)) {
     // Fold stackmap/patchpoint.
     NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
@@ -844,7 +876,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
   // simple, and a common case.
   int FrameIdx = 0;
   if (isLoadFromStackSlot(MI, FrameIdx) &&
-      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+      MF.getFrameInfo().isImmutableObjectIndex(FrameIdx))
     return true;
 
   // Avoid instructions obviously unsafe for remat.
@@ -857,7 +889,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (MI.mayLoad() && !MI.isInvariantLoad(AA))
+  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
@@ -875,7 +907,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI.isConstantPhysReg(Reg, MF))
+        if (!MRI.isConstantPhysReg(Reg))
           return false;
       } else {
         // A physreg def. We can't remat it.
@@ -1091,35 +1123,6 @@ int TargetInstrInfo::computeDefOperandLatency(
   return -1;
 }
 
-unsigned TargetInstrInfo::computeOperandLatency(
-    const InstrItineraryData *ItinData, const MachineInstr &DefMI,
-    unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const {
-
-  int DefLatency = computeDefOperandLatency(ItinData, DefMI);
-  if (DefLatency >= 0)
-    return DefLatency;
-
-  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
-  int OperLatency = 0;
-  if (UseMI)
-    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx);
-  else {
-    unsigned DefClass = DefMI.getDesc().getSchedClass();
-    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
-  }
-  if (OperLatency >= 0)
-    return OperLatency;
-
-  // No operand latency was found.
-  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
-  // Expected latency is the max of the stage latency and itinerary props.
-  InstrLatency = std::max(InstrLatency,
-                          defaultDefLatency(ItinData->SchedModel, DefMI));
-  return InstrLatency;
-}
-
 bool TargetInstrInfo::getRegSequenceInputs(
     const MachineInstr &MI, unsigned DefIdx,
     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6d3fe8c..003311b 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -44,6 +45,14 @@ static cl::opt<bool> JumpIsExpensiveOverride(
     cl::desc("Do not create extra branches to split comparison logic."),
     cl::Hidden);
 
+static cl::opt<unsigned> MinimumJumpTableEntries
+  ("min-jump-table-entries", cl::init(4), cl::Hidden,
+   cl::desc("Set minimum number of entries to use a jump table."));
+
+static cl::opt<unsigned> MaximumJumpTableSize
+  ("max-jump-table-size", cl::init(0), cl::Hidden,
+   cl::desc("Set maximum size of jump tables; zero for no limit."));
+
 // Although this default value is arbitrary, it is not random. It is assumed
 // that a condition that evaluates the same way by a higher percentage than this
 // is best represented as control flow. Therefore, the default value N should be
@@ -352,6 +361,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::MEMCPY] = "memcpy";
   Names[RTLIB::MEMMOVE] = "memmove";
   Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
   Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
   Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
   Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -488,12 +502,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
 }
 
-/// InitLibcallCallingConvs - Set default libcall CallingConvs.
-///
+/// Set default libcall CallingConvs.
 static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
-  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
-    CCs[i] = CallingConv::C;
-  }
+  for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
+    CCs[LC] = CallingConv::C;
 }
 
 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
@@ -756,6 +768,24 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
   return UNKNOWN_LIBCALL;
 }
 
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+  switch (ElementSize) {
+  case 1:
+    return MEMCPY_ELEMENT_ATOMIC_1;
+  case 2:
+    return MEMCPY_ELEMENT_ATOMIC_2;
+  case 4:
+    return MEMCPY_ELEMENT_ATOMIC_4;
+  case 8:
+    return MEMCPY_ELEMENT_ATOMIC_8;
+  case 16:
+    return MEMCPY_ELEMENT_ATOMIC_16;
+  default:
+    return UNKNOWN_LIBCALL;
+  }
+
+}
+
 /// InitCmpLibcallCCs - Set default comparison libcall CC.
 ///
 static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -804,10 +834,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
     = MaxStoresPerMemmoveOptSize = 4;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
   HasMultipleConditionRegisters = false;
   HasExtractBitsInsn = false;
-  FsqrtIsCheap = false;
   JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   MaskAndBranchFoldingIsLegal = false;
@@ -825,7 +853,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   PrefLoopAlignment = 0;
   GatherAllAliasesMaxDepth = 6;
   MinStackArgumentAlignment = 1;
-  MinimumJumpTableEntries = 4;
   // TODO: the default will be switched to 0 in the next commit, along
   // with the Target-specific changes necessary.
   MaxAtomicSizeInBitsSupported = 1024;
@@ -956,15 +983,11 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
   return getScalarShiftAmountTy(DL, LHSTy);
 }
 
-/// canOpTrap - Returns true if the operation can trap for the value type.
-/// VT must be a legal type.
 bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
   assert(isTypeLegal(VT));
   switch (Op) {
   default:
     return false;
-  case ISD::FDIV:
-  case ISD::FREM:
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::SREM:
@@ -1177,7 +1200,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
                                    MachineBasicBlock *MBB) const {
   MachineInstr *MI = &InitialMI;
   MachineFunction &MF = *MI->getParent()->getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // We're handling multiple types of operands here:
   // PATCHPOINT MetaArgs - live-in, read only, direct
@@ -1402,7 +1425,7 @@ void TargetLoweringBase::computeRegisterProperties(
         MVT SVT = (MVT::SimpleValueType) nVT;
         // Promote vectors of integers to vectors with the same number
         // of elements, with a wider element type.
-        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() &&
+        if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
             SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
@@ -1754,9 +1777,41 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
   }
 }
 
+Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
+                                                              bool UseTLS) const {
+  // compiler-rt provides a variable with a magic name.  Targets that do not
+  // link with compiler-rt may also provide such a variable.
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+  auto UnsafeStackPtr =
+      dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
+
+  Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+
+  if (!UnsafeStackPtr) {
+    auto TLSModel = UseTLS ?
+        GlobalValue::InitialExecTLSModel :
+        GlobalValue::NotThreadLocal;
+    // The global variable is not defined yet, define it ourselves.
+    // We use the initial-exec TLS model because we do not support the
+    // variable living anywhere other than in the main executable.
+    UnsafeStackPtr = new GlobalVariable(
+        *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+        UnsafeStackPtrVar, nullptr, TLSModel);
+  } else {
+    // The variable exists, check its type and attributes.
+    if (UnsafeStackPtr->getValueType() != StackPtrTy)
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+    if (UseTLS != UnsafeStackPtr->isThreadLocal())
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+                         (UseTLS ? "" : "not ") + "be thread-local");
+  }
+  return UnsafeStackPtr;
+}
+
 Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
   if (!TM.getTargetTriple().isAndroid())
-    return nullptr;
+    return getDefaultSafeStackPointerLocation(IRB, true);
 
   // Android provides a libc function to retrieve the address of the current
   // thread's unsafe stack pointer.
@@ -1818,9 +1873,7 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
   if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
     Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
     PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
-    auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy));
-    Guard->setVisibility(GlobalValue::HiddenVisibility);
-    return Guard;
+    return M.getOrInsertGlobal("__guard_local", PtrTy);
   }
   return nullptr;
 }
@@ -1840,3 +1893,207 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
 Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
   return nullptr;
 }
+
+unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
+  return MinimumJumpTableEntries;
+}
+
+void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
+  MinimumJumpTableEntries = Val;
+}
+
+unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
+  return MaximumJumpTableSize;
+}
+
+void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
+  MaximumJumpTableSize = Val;
+}
+
+//===----------------------------------------------------------------------===//
+//  Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  StringRef RecipAttrName = "reciprocal-estimates";
+  if (!F->hasFnAttribute(RecipAttrName))
+    return StringRef();
+
+  return F->getFnAttribute(RecipAttrName).getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+  std::string Name = VT.isVector() ? "vec-" : "";
+
+  Name += IsSqrt ? "sqrt" : "div";
+
+  // TODO: Handle "half" or other float types?
+  if (VT.getScalarType() == MVT::f64) {
+    Name += "d";
+  } else {
+    assert(VT.getScalarType() == MVT::f32 &&
+           "Unexpected FP type for reciprocal estimate");
+    Name += "f";
+  }
+
+  return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+                                uint8_t &Value) {
+  const char RefStepToken = ':';
+  Position = In.find(RefStepToken);
+  if (Position == StringRef::npos)
+    return false;
+
+  StringRef RefStepString = In.substr(Position + 1);
+  // Allow exactly one numeric character for the additional refinement
+  // step parameter.
+  if (RefStepString.size() == 1) {
+    char RefStepChar = RefStepString[0];
+    if (RefStepChar >= '0' && RefStepChar <= '9') {
+      Value = RefStepChar - '0';
+      return true;
+    }
+  }
+  report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+  if (Override.empty())
+    return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+  SmallVector<StringRef, 4> OverrideVector;
+  SplitString(Override, OverrideVector, ",");
+  unsigned NumArgs = OverrideVector.size();
+
+  // Check if "all", "none", or "default" was specified.
+  if (NumArgs == 1) {
+    // Look for an optional setting of the number of refinement steps needed
+    // for this type of reciprocal operation.
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (parseRefinementStep(Override, RefPos, RefSteps)) {
+      // Split the string for further processing.
+      Override = Override.substr(0, RefPos);
+    }
+
+    // All reciprocal types are enabled.
+    if (Override == "all")
+      return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+    // All reciprocal types are disabled.
+    if (Override == "none")
+      return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+    // Target defaults for enablement are used.
+    if (Override == "default")
+      return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+  }
+
+  // The attribute string may omit the size suffix ('f'/'d').
+  std::string VTName = getReciprocalOpName(IsSqrt, VT);
+  std::string VTNameNoSize = VTName;
+  VTNameNoSize.pop_back();
+  static const char DisabledPrefix = '!';
+
+  for (StringRef RecipType : OverrideVector) {
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (parseRefinementStep(RecipType, RefPos, RefSteps))
+      RecipType = RecipType.substr(0, RefPos);
+
+    // Ignore the disablement token for string matching.
+    bool IsDisabled = RecipType[0] == DisabledPrefix;
+    if (IsDisabled)
+      RecipType = RecipType.substr(1);
+
+    if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+      return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+                        : TargetLoweringBase::ReciprocalEstimate::Enabled;
+  }
+
+  return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+  if (Override.empty())
+    return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+  SmallVector<StringRef, 4> OverrideVector;
+  SplitString(Override, OverrideVector, ",");
+  unsigned NumArgs = OverrideVector.size();
+
+  // Check if "all", "default", or "none" was specified.
+  if (NumArgs == 1) {
+    // Look for an optional setting of the number of refinement steps needed
+    // for this type of reciprocal operation.
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (!parseRefinementStep(Override, RefPos, RefSteps))
+      return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+    // Split the string for further processing.
+    Override = Override.substr(0, RefPos);
+    assert(Override != "none" &&
+           "Disabled reciprocals, but specifed refinement steps?");
+
+    // If this is a general override, return the specified number of steps.
+    if (Override == "all" || Override == "default")
+      return RefSteps;
+  }
+
+  // The attribute string may omit the size suffix ('f'/'d').
+  std::string VTName = getReciprocalOpName(IsSqrt, VT);
+  std::string VTNameNoSize = VTName;
+  VTNameNoSize.pop_back();
+
+  for (StringRef RecipType : OverrideVector) {
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+      continue;
+
+    RecipType = RecipType.substr(0, RefPos);
+    if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+      return RefSteps;
+  }
+
+  return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+                                                    MachineFunction &MF) const {
+  return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+                                                   MachineFunction &MF) const {
+  return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+                                               MachineFunction &MF) const {
+  return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+                                              MachineFunction &MF) const {
+  return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 5f814c9..eb2a28f 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -50,14 +50,14 @@ using namespace dwarf;
 //===----------------------------------------------------------------------===//
 
 MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
   if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
     return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
-                                          TM.getSymbol(GV, Mang)->getName());
+                                          TM.getSymbol(GV)->getName());
   if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
-    return TM.getSymbol(GV, Mang);
+    return TM.getSymbol(GV);
   report_fatal_error("We do not support this DWARF encoding yet!");
 }
 
@@ -84,20 +84,19 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
 }
 
 const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_indirect) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
-    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
     MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
     if (!StubSym.getPointer()) {
-      MCSymbol *Sym = TM.getSymbol(GV, Mang);
+      MCSymbol *Sym = TM.getSymbol(GV);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -106,8 +105,8 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
                         Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
-  return TargetLoweringObjectFile::
-    getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer);
+  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+                                                           MMI, Streamer);
 }
 
 static SectionKind
@@ -152,6 +151,11 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
 
 
 static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+  // Use SHT_NOTE for section whose name starts with ".note" to allow
+  // emitting ELF notes from C variable declaration.
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609
+  if (Name.startswith(".note"))
+    return ELF::SHT_NOTE;
 
   if (Name == ".init_array")
     return ELF::SHT_INIT_ARRAY;
@@ -177,6 +181,9 @@ static unsigned getELFSectionFlags(SectionKind K) {
   if (K.isText())
     Flags |= ELF::SHF_EXECINSTR;
 
+  if (K.isExecuteOnly())
+    Flags |= ELF::SHF_ARM_PURECODE;
+
   if (K.isWriteable())
     Flags |= ELF::SHF_WRITE;
 
@@ -205,16 +212,15 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
-  StringRef SectionName = GV->getSection();
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  StringRef SectionName = GO->getSection();
 
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
   StringRef Group = "";
   unsigned Flags = getELFSectionFlags(Kind);
-  if (const Comdat *C = getELFComdat(GV)) {
+  if (const Comdat *C = getELFComdat(GO)) {
     Group = C->getName();
     Flags |= ELF::SHF_GROUP;
   }
@@ -243,7 +249,7 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
 }
 
 static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
                           SectionKind Kind, Mangler &Mang,
                           const TargetMachine &TM, bool EmitUniqueSection,
                           unsigned Flags, unsigned *NextUniqueID) {
@@ -271,7 +277,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
   }
 
   StringRef Group = "";
-  if (const Comdat *C = getELFComdat(GV)) {
+  if (const Comdat *C = getELFComdat(GO)) {
     Flags |= ELF::SHF_GROUP;
     Group = C->getName();
   }
@@ -282,8 +288,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
-    unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
-        cast<GlobalVariable>(GV));
+    unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+        cast<GlobalVariable>(GO));
 
     std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
     Name = SizeSpec + utostr(Align);
@@ -293,25 +299,31 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
   } else {
     Name = getSectionPrefixForGlobal(Kind);
   }
-  // FIXME: Extend the section prefix to include hotness catagories such as .hot
-  //  or .unlikely for functions.
+
+  if (const auto *F = dyn_cast<Function>(GO)) {
+    const auto &OptionalPrefix = F->getSectionPrefix();
+    if (OptionalPrefix)
+      Name += *OptionalPrefix;
+  }
 
   if (EmitUniqueSection && UniqueSectionNames) {
     Name.push_back('.');
-    TM.getNameWithPrefix(Name, GV, Mang, true);
+    TM.getNameWithPrefix(Name, GO, Mang, true);
   }
   unsigned UniqueID = MCContext::GenericSectionID;
   if (EmitUniqueSection && !UniqueSectionNames) {
     UniqueID = *NextUniqueID;
     (*NextUniqueID)++;
   }
+  // Use 0 as the unique ID for execute-only text
+  if (Kind.isExecuteOnly())
+    UniqueID = 0;
   return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
                            EntrySize, Group, UniqueID);
 }
 
 MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   unsigned Flags = getELFSectionFlags(Kind);
 
   // If we have -ffunction-section or -fdata-section then we should emit the
@@ -323,14 +335,14 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
     else
       EmitUniqueSection = TM.getDataSections();
   }
-  EmitUniqueSection |= GV->hasComdat();
+  EmitUniqueSection |= GO->hasComdat();
 
-  return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+  return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
                                    EmitUniqueSection, Flags, &NextUniqueID);
 }
 
 MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
-    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+    const Function &F, const TargetMachine &TM) const {
   // If the function can be removed, produce a unique section so that
   // the table doesn't prevent the removal.
   const Comdat *C = F.getComdat();
@@ -339,7 +351,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
     return ReadOnlySection;
 
   return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
-                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
                                    &NextUniqueID);
 }
 
@@ -423,7 +435,7 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
 }
 
 const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
-    const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+    const GlobalValue *LHS, const GlobalValue *RHS,
     const TargetMachine &TM) const {
   // We may only use a PLT-relative relocation to refer to unnamed_addr
   // functions.
@@ -437,22 +449,28 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
     return nullptr;
 
   return MCBinaryExpr::createSub(
-      MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind,
+      MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind,
                               getContext()),
-      MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()),
-      getContext());
+      MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
 }
 
 void
 TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
   UseInitArray = UseInitArray_;
-  if (!UseInitArray)
+  MCContext &Ctx = getContext();
+  if (!UseInitArray) {
+    StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS,
+                                          ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+    StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS,
+                                          ELF::SHF_ALLOC | ELF::SHF_WRITE);
     return;
+  }
 
-  StaticCtorSection = getContext().getELFSection(
-      ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
-  StaticDtorSection = getContext().getELFSection(
-      ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                                        ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                                        ELF::SHF_WRITE | ELF::SHF_ALLOC);
 }
 
 //===----------------------------------------------------------------------===//
@@ -464,11 +482,28 @@ TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
   SupportIndirectSymViaGOTPCRel = true;
 }
 
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0,
+                                            SectionKind::getData());
+    StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0,
+                                            SectionKind::getData());
+  } else {
+    StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func",
+                                            MachO::S_MOD_INIT_FUNC_POINTERS,
+                                            SectionKind::getData());
+    StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func",
+                                            MachO::S_MOD_TERM_FUNC_POINTERS,
+                                            SectionKind::getData());
+  }
+}
+
 /// emitModuleFlags - Perform code emission for module flags.
-void TargetLoweringObjectFileMachO::
-emitModuleFlags(MCStreamer &Streamer,
-                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileMachO::emitModuleFlags(
+    MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+    const TargetMachine &TM) const {
   unsigned VersionVal = 0;
   unsigned ImageInfoFlags = 0;
   MDNode *LinkerOptions = nullptr;
@@ -542,23 +577,22 @@ static void checkMachOComdat(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
   unsigned TAA = 0, StubSize = 0;
   bool TAAParsed;
 
-  checkMachOComdat(GV);
+  checkMachOComdat(GO);
 
   std::string ErrorCode =
-    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+    MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section,
                                           TAA, TAAParsed, StubSize);
   if (!ErrorCode.empty()) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getName() +
+    report_fatal_error("Global variable '" + GO->getName() +
                        "' has an invalid section specifier '" +
-                       GV->getSection() + "': " + ErrorCode + ".");
+                       GO->getSection() + "': " + ErrorCode + ".");
   }
 
   // Get the section.
@@ -575,7 +609,7 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getName() +
+    report_fatal_error("Global variable '" + GO->getName() +
                        "' section type or attributes does not match previous"
                        " section specifier");
   }
@@ -584,20 +618,19 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
 }
 
 MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
-  checkMachOComdat(GV);
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  checkMachOComdat(GO);
 
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
   if (Kind.isThreadData()) return TLSDataSection;
 
   if (Kind.isText())
-    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+    return GO->isWeakForLinker() ? TextCoalSection : TextSection;
 
   // If this is weak/linkonce, put this in a coalescable section, either in text
   // or data depending on if it is writable.
-  if (GV->isWeakForLinker()) {
+  if (GO->isWeakForLinker()) {
     if (Kind.isReadOnly())
       return ConstTextCoalSection;
     return DataCoalSection;
@@ -605,21 +638,21 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      GV->getParent()->getDataLayout().getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      GO->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GO)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
-  if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      GV->getParent()->getDataLayout().getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+  if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
+      GO->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GO)) < 32)
     return UStringSection;
 
   // With MachO only variables whose corresponding symbol starts with 'l' or
   // 'L' can be merged, so we only try merging GVs with private linkage.
-  if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
+  if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) {
     if (Kind.isMergeableConst4())
       return FourByteConstantSection;
     if (Kind.isMergeableConst8())
@@ -670,23 +703,21 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
     MachineModuleInfoMachO &MachOMMI =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-    MCSymbol *SSym =
-        getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
 
     // Add information about the stub reference to MachOMMI so that the stub
     // gets emitted by the asmprinter.
     MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
     if (!StubSym.getPointer()) {
-      MCSymbol *Sym = TM.getSymbol(GV, Mang);
+      MCSymbol *Sym = TM.getSymbol(GV);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -695,24 +726,24 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
                         Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
-  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang,
-                                                           TM, MMI, Streamer);
+  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+                                                           MMI, Streamer);
 }
 
 MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   // The mach-o version of this method defaults to returning a stub reference.
   MachineModuleInfoMachO &MachOMMI =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-  MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+  MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
 
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
   MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (!StubSym.getPointer()) {
-    MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    MCSymbol *Sym = TM.getSymbol(GV);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
   }
 
@@ -793,13 +824,16 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
 }
 
 void TargetLoweringObjectFileMachO::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
     const TargetMachine &TM) const {
-  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
-  const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
-  bool CannotUsePrivateLabel =
-      !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
-  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+  bool CannotUsePrivateLabel = true;
+  if (auto *GO = GV->getBaseObject()) {
+    SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
+    const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
+    CannotUsePrivateLabel =
+        !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+  }
+  getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
 }
 
 //===----------------------------------------------------------------------===//
@@ -886,22 +920,21 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   int Selection = 0;
   unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
-  StringRef Name = GV->getSection();
+  StringRef Name = GO->getSection();
   StringRef COMDATSymName = "";
-  if (GV->hasComdat()) {
-    Selection = getSelectionForCOFF(GV);
+  if (GO->hasComdat()) {
+    Selection = getSelectionForCOFF(GO);
     const GlobalValue *ComdatGV;
     if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
-      ComdatGV = getComdatGVForCOFF(GV);
+      ComdatGV = getComdatGVForCOFF(GO);
     else
-      ComdatGV = GV;
+      ComdatGV = GO;
 
     if (!ComdatGV->hasPrivateLinkage()) {
-      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      MCSymbol *Sym = TM.getSymbol(ComdatGV);
       COMDATSymName = Sym->getName();
       Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
     } else {
@@ -926,8 +959,7 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
 }
 
 MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // If we have -ffunction-sections then we should emit the global value to a
   // uniqued section specifically for it.
   bool EmitUniquedSection;
@@ -936,32 +968,32 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
   else
     EmitUniquedSection = TM.getDataSections();
 
-  if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
+  if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) {
     const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    int Selection = getSelectionForCOFF(GV);
+    int Selection = getSelectionForCOFF(GO);
     if (!Selection)
       Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
     const GlobalValue *ComdatGV;
-    if (GV->hasComdat())
-      ComdatGV = getComdatGVForCOFF(GV);
+    if (GO->hasComdat())
+      ComdatGV = getComdatGVForCOFF(GO);
     else
-      ComdatGV = GV;
+      ComdatGV = GO;
 
     unsigned UniqueID = MCContext::GenericSectionID;
     if (EmitUniquedSection)
       UniqueID = NextUniqueID++;
 
     if (!ComdatGV->hasPrivateLinkage()) {
-      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      MCSymbol *Sym = TM.getSymbol(ComdatGV);
       StringRef COMDATSymName = Sym->getName();
       return getContext().getCOFFSection(Name, Characteristics, Kind,
                                          COMDATSymName, Selection, UniqueID);
     } else {
       SmallString<256> TmpData;
-      Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
+      getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true);
       return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
                                          Selection, UniqueID);
     }
@@ -986,7 +1018,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
 }
 
 void TargetLoweringObjectFileCOFF::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
     const TargetMachine &TM) const {
   bool CannotUsePrivateLabel = false;
   if (GV->hasPrivateLinkage() &&
@@ -994,11 +1026,11 @@ void TargetLoweringObjectFileCOFF::getNameWithPrefix(
        (isa<GlobalVariable>(GV) && TM.getDataSections())))
     CannotUsePrivateLabel = true;
 
-  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+  getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
 }
 
 MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
-    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+    const Function &F, const TargetMachine &TM) const {
   // If the function can be removed, produce a unique section so that
   // the table doesn't prevent the removal.
   const Comdat *C = F.getComdat();
@@ -1010,7 +1042,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
   if (F.hasPrivateLinkage())
     return ReadOnlySection;
 
-  MCSymbol *Sym = TM.getSymbol(&F, Mang);
+  MCSymbol *Sym = TM.getSymbol(&F);
   StringRef COMDATSymName = Sym->getName();
 
   SectionKind Kind = SectionKind::getReadOnly();
@@ -1023,10 +1055,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
                                      COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
 }
 
-void TargetLoweringObjectFileCOFF::
-emitModuleFlags(MCStreamer &Streamer,
-                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileCOFF::emitModuleFlags(
+    MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+    const TargetMachine &TM) const {
   MDNode *LinkerOptions = nullptr;
 
   for (const auto &MFE : ModuleFlags) {
@@ -1052,6 +1083,31 @@ emitModuleFlags(MCStreamer &Streamer,
   }
 }
 
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  const Triple &T = TM.getTargetTriple();
+  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+    StaticCtorSection =
+        Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ,
+                           SectionKind::getReadOnly());
+    StaticDtorSection =
+        Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ,
+                           SectionKind::getReadOnly());
+  } else {
+    StaticCtorSection = Ctx.getCOFFSection(
+        ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getData());
+    StaticDtorSection = Ctx.getCOFFSection(
+        ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getData());
+  }
+}
+
 MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
@@ -1065,7 +1121,7 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
 }
 
 void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
-    raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const {
+    raw_ostream &OS, const GlobalValue *GV) const {
   if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
     return;
 
@@ -1079,14 +1135,14 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
   if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
     std::string Flag;
     raw_string_ostream FlagOS(Flag);
-    Mang.getNameWithPrefix(FlagOS, GV, false);
+    getMangler().getNameWithPrefix(FlagOS, GV, false);
     FlagOS.flush();
     if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
       OS << Flag.substr(1);
     else
       OS << Flag;
   } else {
-    Mang.getNameWithPrefix(OS, GV, false);
+    getMangler().getNameWithPrefix(OS, GV, false);
   }
 
   if (!GV->getValueType()->isFunctionTy()) {
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 8d2048f..b6da8e0 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -29,7 +29,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
 
   // Check to see if we should eliminate non-leaf frame pointers.
   if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
-    return MF.getFrameInfo()->hasCalls();
+    return MF.getFrameInfo().hasCalls();
 
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index b8c8209..e7ea2b4 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -38,8 +38,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
-    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc Scheduler"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
     cl::desc("Disable branch folding"));
 static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
@@ -98,6 +98,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
                    cl::value_desc("pass-name"), cl::init("option-unspecified"));
 
+static cl::opt<int> EnableGlobalISelAbort(
+    "global-isel-abort", cl::Hidden,
+    cl::desc("Enable abort calls when \"global\" instruction selection "
+             "fails to lower/select an instruction: 0 disable the abort, "
+             "1 enable the abort, and "
+             "2 disable the abort but emit a diagnostic on failure"),
+    cl::init(1));
+
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
 // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
@@ -121,8 +129,7 @@ static cl::opt<CFLAAType> UseCFLAA(
                clEnumValN(CFLAAType::Andersen, "anders",
                           "Enable inclusion-based CFL-AA"),
                clEnumValN(CFLAAType::Both, "both", 
-                          "Enable both variants of CFL-AA"),
-               clEnumValEnd));
+                          "Enable both variants of CFL-AA")));
 
 /// Allow standard passes to be disabled by command line options. This supports
 /// simple binary flags that either suppress the pass or do nothing.
@@ -150,7 +157,7 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
 static IdentifyingPassPtr overridePass(AnalysisID StandardID,
                                        IdentifyingPassPtr TargetID) {
   if (StandardID == &PostRASchedulerID)
-    return applyDisable(TargetID, DisablePostRA);
+    return applyDisable(TargetID, DisablePostRASched);
 
   if (StandardID == &BranchFolderPassID)
     return applyDisable(TargetID, DisableBranchFold);
@@ -252,8 +259,7 @@ TargetPassConfig::~TargetPassConfig() {
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
-    : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
-      StopAfter(nullptr), Started(true), Stopped(false),
+    : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
       AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
       DisableVerify(false), EnableTailMerge(true) {
 
@@ -347,6 +353,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
 
   if (StartBefore == PassID)
     Started = true;
+  if (StopBefore == PassID)
+    Stopped = true;
   if (Started && !Stopped) {
     std::string Banner;
     // Construct banner message before PM->add() as that may delete the pass.
@@ -469,12 +477,17 @@ void TargetPassConfig::addIRPasses() {
 
   if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
     addPass(createPartiallyInlineLibCallsPass());
+
+  // Insert calls to mcount-like functions.
+  addPass(createCountingFunctionInserterPass());
 }
 
 /// Turn exception handling constructs into something the code generators can
 /// handle.
 void TargetPassConfig::addPassesToHandleExceptions() {
-  switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+  const MCAsmInfo *MCAI = TM->getMCAsmInfo();
+  assert(MCAI && "No MCAsmInfo");
+  switch (MCAI->getExceptionHandlingType()) {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
@@ -483,7 +496,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
     addPass(createSjLjEHPreparePass());
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
     addPass(createDwarfEHPass(TM));
@@ -557,9 +570,6 @@ void TargetPassConfig::addISelPrepare() {
 void TargetPassConfig::addMachinePasses() {
   AddingMachinePasses = true;
 
-  if (TM->Options.EnableIPRA)
-    addPass(createRegUsageInfoPropPass());
-
   // Insert a machine instr printer pass after the specified pass.
   if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
       !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
@@ -575,6 +585,9 @@ void TargetPassConfig::addMachinePasses() {
   // Print the instruction selected machine code...
   printAndVerify("After Instruction Selection");
 
+  if (TM->Options.EnableIPRA)
+    addPass(createRegUsageInfoPropPass());
+
   // Expand pseudo-instructions emitted by ISel.
   addPass(&ExpandISelPseudosID);
 
@@ -886,3 +899,14 @@ void TargetPassConfig::addBlockPlacement() {
       addPass(&MachineBlockPlacementStatsID);
   }
 }
+
+//===---------------------------------------------------------------------===//
+/// GlobalISel Configuration
+//===---------------------------------------------------------------------===//
+bool TargetPassConfig::isGlobalISelAbortEnabled() const {
+  return EnableGlobalISelAbort == 1;
+}
+
+bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
+  return EnableGlobalISelAbort == 2;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e1d90cb..cd50c5b 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
 TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                              regclass_iterator RCB, regclass_iterator RCE,
                              const char *const *SRINames,
-                             const unsigned *SRILaneMasks,
-                             unsigned SRICoveringLanes)
+                             const LaneBitmask *SRILaneMasks,
+                             LaneBitmask SRICoveringLanes)
   : InfoDesc(ID), SubRegIndexNames(SRINames),
     SubRegIndexLaneMasks(SRILaneMasks),
     RegClassBegin(RCB), RegClassEnd(RCE),
@@ -40,6 +40,36 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
 
+void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
+    const {
+  for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
+    RegisterSet.set(*AI);
+}
+
+bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
+    ArrayRef<MCPhysReg> Exceptions) const {
+  // Check that all super registers of reserved regs are reserved as well.
+  BitVector Checked(getNumRegs());
+  for (int Reg = RegisterSet.find_first(); Reg>=0;
+       Reg = RegisterSet.find_next(Reg)) {
+    if (Checked[Reg])
+      continue;
+    for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
+      if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
+        dbgs() << "Error: Super register " << PrintReg(*SR, this)
+               << " of reserved register " << PrintReg(Reg, this)
+               << " is not reserved.\n";
+        return false;
+      }
+
+      // We transitively check superregs. So we can remember this for later
+      // to avoid compiletime explosion in deep register hierarchies.
+      Checked.set(*SR);
+    }
+  }
+  return true;
+}
+
 namespace llvm {
 
 Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
@@ -97,12 +127,6 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
   });
 }
 
-Printable PrintLaneMask(LaneBitmask LaneMask) {
-  return Printable([LaneMask](raw_ostream &OS) {
-    OS << format("%08X", LaneMask);
-  });
-}
-
 } // End of llvm namespace
 
 /// getAllocatableClass - Return the maximal subclass of the given register
@@ -354,7 +378,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   // Check that Phys is in the allocation order. We shouldn't heed hints
   // from VirtReg's register class if they aren't in the allocation order. The
   // target probably has a reason for removing the register.
-  if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+  if (!is_contained(Order, Phys))
     return;
 
   // All clear, tell the register allocator to prefer this register.
@@ -367,11 +391,11 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
 bool TargetRegisterInfo::needsStackRealignment(
     const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const Function *F = MF.getFunction();
   unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+  bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
                               F->hasFnAttribute(Attribute::StackAlignment));
   if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
     if (canRealignStack(MF))
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 022e912..83e52d3 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -144,7 +144,7 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
   unsigned UseIdx = 0;
   for (unsigned i = 0; i != UseOperIdx; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.readsReg())
+    if (MO.isReg() && MO.readsReg() && !MO.isDef())
       ++UseIdx;
   }
   return UseIdx;
diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
new file mode 100644
index 0000000..c74707d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -0,0 +1,54 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo(
+    const Triple &TT, StringRef CPU, StringRef FS,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
+    const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+    const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
+    const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+    : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enableAtomicExpand() const {
+  return true;
+}
+
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+  return false;
+}
+
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+  return enableMachineScheduler();
+}
+
+bool TargetSubtargetInfo::enableRALocalReassignment(
+    CodeGenOpt::Level OptLevel) const {
+  return true;
+}
+
+bool TargetSubtargetInfo::enablePostRAScheduler() const {
+  return getSchedModel().PostRAScheduler;
+}
+
+bool TargetSubtargetInfo::useAA() const {
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8feb18b..0f1b2ed 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -109,7 +109,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
                              MachineInstr *MI, unsigned Dist);
 
-  bool commuteInstruction(MachineInstr *MI,
+  bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
                           unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
 
   bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
@@ -651,6 +651,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
 /// Commute a two-address instruction and update the basic block, distance map,
 /// and live variables if needed. Return true if it is successful.
 bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+                                                   unsigned DstIdx,
                                                    unsigned RegBIdx,
                                                    unsigned RegCIdx,
                                                    unsigned Dist) {
@@ -671,7 +672,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
   // Update source register map.
   unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
   if (FromRegC) {
-    unsigned RegA = MI->getOperand(0).getReg();
+    unsigned RegA = MI->getOperand(DstIdx).getReg();
     SrcRegMap[RegA] = FromRegC;
   }
 
@@ -1171,6 +1172,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
                                                       unsigned BaseOpIdx,
                                                       bool BaseOpKilled,
                                                       unsigned Dist) {
+  if (!MI->isCommutable())
+    return false;
+
   unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
   unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
   unsigned OpsNum = MI->getDesc().getNumOperands();
@@ -1180,7 +1184,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
     // and OtherOpIdx are commutable, it does not really search for
     // other commutable operands and does not change the values of passed
     // variables.
-    if (OtherOpIdx == BaseOpIdx ||
+    if (OtherOpIdx == BaseOpIdx || !MI->getOperand(OtherOpIdx).isReg() ||
         !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
       continue;
 
@@ -1199,7 +1203,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
     }
 
     // If it's profitable to commute, try to do so.
-    if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+    if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
+                                        Dist)) {
       ++NumCommuted;
       if (AggressiveCommute)
         ++NumAggrCommuted;
@@ -1567,14 +1572,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     if (!IsEarlyClobber) {
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (MachineOperand &MO : MI->operands()) {
-        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+        if (MO.isReg() && MO.getReg() == RegB &&
             MO.isUse()) {
           if (MO.isKill()) {
             MO.setIsKill(false);
             RemovedKillFlag = true;
           }
           MO.setReg(LastCopiedReg);
-          MO.setSubReg(0);
+          MO.setSubReg(MO.getSubReg());
         }
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 501e01c..c2db56a 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -40,7 +40,7 @@
 using namespace llvm;
 
 static bool eliminateUnreachableBlock(Function &F) {
-  SmallPtrSet<BasicBlock*, 8> Reachable;
+  df_iterator_default_set<BasicBlock*> Reachable;
 
   // Mark all reachable blocks.
   for (BasicBlock *BB : depth_first_ext(&F, Reachable))
@@ -130,7 +130,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
-  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<MachineBasicBlock*> Reachable;
   bool ModifiedPHI = false;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 8a3a032..0d506d6 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -73,8 +73,8 @@ void VirtRegMap::grow() {
 }
 
 unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                      RC->getAlignment());
+  int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+                                                     RC->getAlignment());
   ++NumSpillSlots;
   return SS;
 }
@@ -110,7 +110,7 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
          "attempt to assign stack slot to already spilled register");
   assert((SS >= 0 ||
-          (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+          (SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
          "illegal fixed frame index");
   Virt2StackSlotMap[virtReg] = SS;
 }
@@ -177,7 +177,7 @@ public:
   bool runOnMachineFunction(MachineFunction&) override;
   MachineFunctionProperties getSetProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 } // end anonymous namespace
@@ -266,7 +266,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
     SlotIndex MBBBegin = MBBI->first;
     // Advance all subrange iterators so that their end position is just
     // behind MBBBegin (or the iterator is at the end).
-    LaneBitmask LaneMask = 0;
+    LaneBitmask LaneMask;
     for (auto &RangeIterPair : SubRanges) {
       const LiveInterval::SubRange *SR = RangeIterPair.first;
       LiveInterval::const_iterator &SRI = RangeIterPair.second;
@@ -277,7 +277,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
       if (SRI->start <= MBBBegin)
         LaneMask |= SR->LaneMask;
     }
-    if (LaneMask == 0)
+    if (LaneMask.none())
       continue;
     MachineBasicBlock *MBB = MBBI->second;
     MBB->addLiveIn(PhysReg, LaneMask);
@@ -338,10 +338,11 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
   assert(LI.liveAt(BaseIndex) &&
          "Reads of completely dead register should be marked undef already");
   unsigned SubRegIdx = MO.getSubReg();
+  assert(SubRegIdx != 0 && LI.hasSubRanges());
   LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   // See if any of the relevant subregister liveranges is defined at this point.
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
-    if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
+    if ((SR.LaneMask & UseMask).any() && SR.liveAt(BaseIndex))
       return false;
   }
   return true;
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 041fb7b..568720c 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -62,7 +62,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Windows exception handling preparation";
   }
 
@@ -521,7 +521,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
 
     if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
       // Create the entry for this cleanup with the appropriate handler
-      // properties.  Finaly and fault handlers are distinguished by arity.
+      // properties.  Finally and fault handlers are distinguished by arity.
       ClrHandlerType HandlerType =
           (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
                                         : ClrHandlerType::Finally);
@@ -708,7 +708,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
 
 void WinEHPrepare::cloneCommonBlocks(Function &F) {
   // We need to clone all blocks which belong to multiple funclets.  Values are
-  // remapped throughout the funclet to propogate both the new instructions
+  // remapped throughout the funclet to propagate both the new instructions
   // *and* the new basic blocks themselves.
   for (auto &Funclets : FuncletBlocks) {
     BasicBlock *FuncletPadBB = Funclets.first;
@@ -1202,8 +1202,12 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
       Goto->setSuccessor(0, PHIBlock);
       CatchRet->setSuccessor(NewBlock);
       // Update the color mapping for the newly split edge.
+      // Grab a reference to the ColorVector to be inserted before getting the
+      // reference to the vector we are copying because inserting the new
+      // element in BlockColors might cause the map to be reallocated.
+      ColorVector &ColorsForNewBlock = BlockColors[NewBlock];
       ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
-      BlockColors[NewBlock] = ColorsForPHIBlock;
+      ColorsForNewBlock = ColorsForPHIBlock;
       for (BasicBlock *FuncletPad : ColorsForPHIBlock)
         FuncletBlocks[FuncletPad].push_back(NewBlock);
       // Treat the new block as incoming for load insertion.
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 1f95708..63bd762 100644
--- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -34,7 +34,82 @@ struct XRayInstrumentation : public MachineFunctionPass {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  // Replace the original RET instruction with the exit sled code ("patchable
+  //   ret" pseudo-instruction), so that at runtime XRay can replace the sled
+  //   with a code jumping to XRay trampoline, which calls the tracing handler
+  //   and, in the end, issues the RET instruction.
+  // This is the approach to go on CPUs which have a single RET instruction,
+  //   like x86/x86_64.
+  void replaceRetWithPatchableRet(MachineFunction &MF,
+    const TargetInstrInfo *TII);
+
+  // Prepend the original return instruction with the exit sled code ("patchable
+  //   function exit" pseudo-instruction), preserving the original return
+  //   instruction just after the exit sled code.
+  // This is the approach to go on CPUs which have multiple options for the
+  //   return instruction, like ARM. For such CPUs we can't just jump into the
+  //   XRay trampoline and issue a single return instruction there. We rather
+  //   have to call the trampoline and return from it to the original return
+  //   instruction of the function being instrumented.
+  void prependRetWithPatchableExit(MachineFunction &MF,
+    const TargetInstrInfo *TII);
 };
+} // anonymous namespace
+
+void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  // We look for *all* terminators and returns, then replace those with
+  // PATCHABLE_RET instructions.
+  SmallVector<MachineInstr *, 4> Terminators;
+  for (auto &MBB : MF) {
+    for (auto &T : MBB.terminators()) {
+      unsigned Opc = 0;
+      if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+        // Replace return instructions with:
+        //   PATCHABLE_RET <Opcode>, <Operand>...
+        Opc = TargetOpcode::PATCHABLE_RET;
+      }
+      if (TII->isTailCall(T)) {
+        // Treat the tail call as a return instruction, which has a
+        // different-looking sled than the normal return case.
+        Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+      }
+      if (Opc != 0) {
+        auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
+                       .addImm(T.getOpcode());
+        for (auto &MO : T.operands())
+          MIB.addOperand(MO);
+        Terminators.push_back(&T);
+      }
+    }
+  }
+
+  for (auto &I : Terminators)
+    I->eraseFromParent();
+}
+
+void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  for (auto &MBB : MF) {
+    for (auto &T : MBB.terminators()) {
+      unsigned Opc = 0;
+      if (T.isReturn()) {
+        Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
+      }
+      if (TII->isTailCall(T)) {
+        Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+      }
+      if (Opc != 0) {
+        // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or
+        //   PATCHABLE_TAIL_CALL .
+        BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc));
+      }
+    }
+  }
 }
 
 bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
@@ -54,39 +129,43 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
       return false; // Function is too small.
   }
 
+  // We look for the first non-empty MachineBasicBlock, so that we can insert
+  // the function instrumentation in the appropriate place.
+  auto MBI =
+      find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+  if (MBI == MF.end())
+    return false; // The function is empty.
+
+  auto *TII = MF.getSubtarget().getInstrInfo();
+  auto &FirstMBB = *MBI;
+  auto &FirstMI = *FirstMBB.begin();
+
+  if (!MF.getSubtarget().isXRaySupported()) {
+    FirstMI.emitError("An attempt to perform XRay instrumentation for an"
+      " unsupported target.");
+    return false;
+  }
+
   // FIXME: Do the loop triviality analysis here or in an earlier pass.
 
   // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
   // MachineFunction.
-  auto &FirstMBB = *MF.begin();
-  auto &FirstMI = *FirstMBB.begin();
-  auto *TII = MF.getSubtarget().getInstrInfo();
   BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
           TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
 
-  // Then we look for *all* terminators and returns, then replace those with
-  // PATCHABLE_RET instructions.
-  SmallVector<MachineInstr *, 4> Terminators;
-  for (auto &MBB : MF) {
-    for (auto &T : MBB.terminators()) {
-      // FIXME: Handle tail calls here too?
-      if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
-        // Replace return instructions with:
-        //   PATCHABLE_RET <Opcode>, <Operand>...
-        auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
-                           TII->get(TargetOpcode::PATCHABLE_RET))
-                       .addImm(T.getOpcode());
-        for (auto &MO : T.operands())
-          MIB.addOperand(MO);
-        Terminators.push_back(&T);
-        break;
-      }
-    }
+  switch (MF.getTarget().getTargetTriple().getArch()) {
+  case Triple::ArchType::arm:
+  case Triple::ArchType::thumb:
+  case Triple::ArchType::aarch64:
+    // For the architectures which don't have a single return instruction
+    prependRetWithPatchableExit(MF, TII);
+    break;
+  default:
+    // For the architectures that have a single return instruction (such as
+    //   RETQ on x86_64).
+    replaceRetWithPatchableRet(MF, TII);
+    break;
   }
-
-  for (auto &I : Terminators)
-    I->eraseFromParent();
-
   return true;
 }